initial commit
This commit is contained in:
commit
9df0f6ad3a
1
.Lexer.ml.tag
Normal file
1
.Lexer.ml.tag
Normal file
@ -0,0 +1 @@
|
|||||||
|
ocamlc: -w -42
|
0
.LexerMain.tag
Normal file
0
.LexerMain.tag
Normal file
1
.Parser.mly.tag
Normal file
1
.Parser.mly.tag
Normal file
@ -0,0 +1 @@
|
|||||||
|
--explain --external-tokens LexToken --base Parser ParToken.mly
|
0
.ParserMain.tag
Normal file
0
.ParserMain.tag
Normal file
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
_build/*
|
||||||
|
*/_build
|
||||||
|
*~
|
||||||
|
.merlin
|
||||||
|
*/.merlin
|
||||||
|
*.install
|
2
.links
Normal file
2
.links
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
$HOME/git/OCaml-build/Makefile
|
||||||
|
$HOME/git/OCaml-build/Makefile.cfg
|
916
AST.ml
Normal file
916
AST.ml
Normal file
@ -0,0 +1,916 @@
|
|||||||
|
(* Abstract Syntax Tree (AST) for Ligo *)
|
||||||
|
|
||||||
|
open Utils
|
||||||
|
|
||||||
|
(* Regions
|
||||||
|
|
||||||
|
The AST carries all the regions where tokens have been found by the
|
||||||
|
lexer, plus additional regions corresponding to whole subtrees
|
||||||
|
(like entire expressions, patterns etc.). These regions are needed
|
||||||
|
for error reporting and source-to-source transformations. To make
|
||||||
|
these pervasive regions more legible, we define singleton types for
|
||||||
|
the symbols, keywords etc. with suggestive names like "kwd_and"
|
||||||
|
denoting the _region_ of the occurrence of the keyword "and".
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'a reg = 'a Region.reg
|
||||||
|
|
||||||
|
let rec last to_region = function
|
||||||
|
[] -> Region.ghost
|
||||||
|
| [x] -> to_region x
|
||||||
|
| _::t -> last to_region t
|
||||||
|
|
||||||
|
let nseq_to_region to_region (hd,tl) =
|
||||||
|
Region.cover (to_region hd) (last to_region tl)
|
||||||
|
|
||||||
|
let nsepseq_to_region to_region (hd,tl) =
|
||||||
|
let reg (_,item) = to_region item in
|
||||||
|
Region.cover (to_region hd) (last reg tl)
|
||||||
|
|
||||||
|
let sepseq_to_region to_region = function
|
||||||
|
None -> Region.ghost
|
||||||
|
| Some seq -> nsepseq_to_region to_region seq
|
||||||
|
|
||||||
|
(* Keywords of Ligo *)
|
||||||
|
|
||||||
|
type kwd_begin = Region.t
|
||||||
|
type kwd_const = Region.t
|
||||||
|
type kwd_down = Region.t
|
||||||
|
type kwd_if = Region.t
|
||||||
|
type kwd_in = Region.t
|
||||||
|
type kwd_is = Region.t
|
||||||
|
type kwd_for = Region.t
|
||||||
|
type kwd_function = Region.t
|
||||||
|
type kwd_parameter = Region.t
|
||||||
|
type kwd_storage = Region.t
|
||||||
|
type kwd_type = Region.t
|
||||||
|
type kwd_of = Region.t
|
||||||
|
type kwd_operations = Region.t
|
||||||
|
type kwd_var = Region.t
|
||||||
|
type kwd_end = Region.t
|
||||||
|
type kwd_then = Region.t
|
||||||
|
type kwd_else = Region.t
|
||||||
|
type kwd_match = Region.t
|
||||||
|
type kwd_procedure = Region.t
|
||||||
|
type kwd_null = Region.t
|
||||||
|
type kwd_record = Region.t
|
||||||
|
type kwd_step = Region.t
|
||||||
|
type kwd_to = Region.t
|
||||||
|
type kwd_mod = Region.t
|
||||||
|
type kwd_not = Region.t
|
||||||
|
type kwd_while = Region.t
|
||||||
|
type kwd_with = Region.t
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
type c_False = Region.t
|
||||||
|
type c_None = Region.t
|
||||||
|
type c_Some = Region.t
|
||||||
|
type c_True = Region.t
|
||||||
|
type c_Unit = Region.t
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
type semi = Region.t
|
||||||
|
type comma = Region.t
|
||||||
|
type lpar = Region.t
|
||||||
|
type rpar = Region.t
|
||||||
|
type lbrace = Region.t
|
||||||
|
type rbrace = Region.t
|
||||||
|
type lbracket = Region.t
|
||||||
|
type rbracket = Region.t
|
||||||
|
type cons = Region.t
|
||||||
|
type vbar = Region.t
|
||||||
|
type arrow = Region.t
|
||||||
|
type asgnmnt = Region.t
|
||||||
|
type equal = Region.t
|
||||||
|
type colon = Region.t
|
||||||
|
type bool_or = Region.t
|
||||||
|
type bool_and = Region.t
|
||||||
|
type lt = Region.t
|
||||||
|
type leq = Region.t
|
||||||
|
type gt = Region.t
|
||||||
|
type geq = Region.t
|
||||||
|
type neq = Region.t
|
||||||
|
type plus = Region.t
|
||||||
|
type minus = Region.t
|
||||||
|
type slash = Region.t
|
||||||
|
type times = Region.t
|
||||||
|
type dot = Region.t
|
||||||
|
type wild = Region.t
|
||||||
|
type cat = Region.t
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
type eof = Region.t
|
||||||
|
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
type variable = string reg
|
||||||
|
type fun_name = string reg
|
||||||
|
type type_name = string reg
|
||||||
|
type field_name = string reg
|
||||||
|
type map_name = string reg
|
||||||
|
type constr = string reg
|
||||||
|
|
||||||
|
(* Comma-separated non-empty lists *)
|
||||||
|
|
||||||
|
type 'a csv = ('a, comma) nsepseq
|
||||||
|
|
||||||
|
(* Bar-separated non-empty lists *)
|
||||||
|
|
||||||
|
type 'a bsv = ('a, vbar) nsepseq
|
||||||
|
|
||||||
|
(* Parentheses *)
|
||||||
|
|
||||||
|
type 'a par = (lpar * 'a * rpar) reg
|
||||||
|
|
||||||
|
(* Brackets compounds *)
|
||||||
|
|
||||||
|
type 'a brackets = (lbracket * 'a * rbracket) reg
|
||||||
|
|
||||||
|
(* Braced compounds *)
|
||||||
|
|
||||||
|
type 'a braces = (lbrace * 'a * rbrace) reg
|
||||||
|
|
||||||
|
(* The Abstract Syntax Tree *)
|
||||||
|
|
||||||
|
type t = <
|
||||||
|
types : type_decl list;
|
||||||
|
parameter : parameter_decl;
|
||||||
|
storage : storage_decl;
|
||||||
|
operations : operations_decl;
|
||||||
|
lambdas : lambda_decl list;
|
||||||
|
block : block reg;
|
||||||
|
eof : eof
|
||||||
|
>
|
||||||
|
|
||||||
|
and ast = t
|
||||||
|
|
||||||
|
and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and storage_decl = (kwd_storage * type_expr) reg
|
||||||
|
|
||||||
|
and operations_decl = (kwd_operations * type_expr) reg
|
||||||
|
|
||||||
|
(* Type declarations *)
|
||||||
|
|
||||||
|
and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
|
||||||
|
|
||||||
|
and type_expr =
|
||||||
|
Prod of cartesian
|
||||||
|
| Sum of (variant, vbar) nsepseq reg
|
||||||
|
| Record of record_type
|
||||||
|
| TypeApp of (type_name * type_tuple) reg
|
||||||
|
| ParType of type_expr par
|
||||||
|
| TAlias of variable
|
||||||
|
|
||||||
|
and cartesian = (type_expr, times) nsepseq reg
|
||||||
|
|
||||||
|
and variant = (constr * kwd_of * cartesian) reg
|
||||||
|
|
||||||
|
and record_type = (kwd_record * field_decls * kwd_end) reg
|
||||||
|
|
||||||
|
and field_decls = (field_decl, semi) nsepseq
|
||||||
|
|
||||||
|
and field_decl = (variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and type_tuple = (type_name, comma) nsepseq par
|
||||||
|
|
||||||
|
(* Function and procedure declarations *)
|
||||||
|
|
||||||
|
and lambda_decl =
|
||||||
|
FunDecl of fun_decl reg
|
||||||
|
| ProcDecl of proc_decl reg
|
||||||
|
|
||||||
|
and fun_decl = <
|
||||||
|
kwd_function : kwd_function;
|
||||||
|
var : variable;
|
||||||
|
param : parameters;
|
||||||
|
colon : colon;
|
||||||
|
ret_type : type_expr;
|
||||||
|
kwd_is : kwd_is;
|
||||||
|
body : block reg;
|
||||||
|
kwd_with : kwd_with;
|
||||||
|
return : expr
|
||||||
|
>
|
||||||
|
|
||||||
|
and proc_decl = <
|
||||||
|
kwd_procedure : kwd_procedure;
|
||||||
|
var : variable;
|
||||||
|
param : parameters;
|
||||||
|
kwd_is : kwd_is;
|
||||||
|
body : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
and parameters = (param_decl, semi) nsepseq par
|
||||||
|
|
||||||
|
and param_decl = (var_kind * variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and var_kind =
|
||||||
|
Mutable of kwd_var
|
||||||
|
| Const of kwd_const
|
||||||
|
|
||||||
|
and block = <
|
||||||
|
decls : value_decls;
|
||||||
|
opening : kwd_begin;
|
||||||
|
instr : instructions;
|
||||||
|
close : kwd_end
|
||||||
|
>
|
||||||
|
|
||||||
|
and value_decls = (var_decl reg, semi) sepseq reg
|
||||||
|
|
||||||
|
and var_decl = <
|
||||||
|
kind : var_kind;
|
||||||
|
var : variable;
|
||||||
|
colon : colon;
|
||||||
|
vtype : type_expr;
|
||||||
|
setter : Region.t; (* "=" or ":=" *)
|
||||||
|
init : expr
|
||||||
|
>
|
||||||
|
|
||||||
|
and instructions = (instruction, semi) nsepseq reg
|
||||||
|
|
||||||
|
and instruction =
|
||||||
|
Single of single_instr
|
||||||
|
| Block of block reg
|
||||||
|
|
||||||
|
and single_instr =
|
||||||
|
Cond of conditional reg
|
||||||
|
| Match of match_instr reg
|
||||||
|
| Asgnmnt of asgnmnt_instr
|
||||||
|
| Loop of loop
|
||||||
|
| ProcCall of fun_call
|
||||||
|
| Null of kwd_null
|
||||||
|
|
||||||
|
and conditional = <
|
||||||
|
kwd_if : kwd_if;
|
||||||
|
test : expr;
|
||||||
|
kwd_then : kwd_then;
|
||||||
|
ifso : instruction;
|
||||||
|
kwd_else : kwd_else;
|
||||||
|
ifnot : instruction
|
||||||
|
>
|
||||||
|
|
||||||
|
and match_instr = <
|
||||||
|
kwd_match : kwd_match;
|
||||||
|
expr : expr;
|
||||||
|
kwd_with : kwd_with;
|
||||||
|
cases : cases;
|
||||||
|
kwd_end : kwd_end
|
||||||
|
>
|
||||||
|
|
||||||
|
and cases = (case, vbar) nsepseq reg
|
||||||
|
|
||||||
|
and case = (pattern * arrow * instruction) reg
|
||||||
|
|
||||||
|
and asgnmnt_instr = (variable * asgnmnt * expr) reg
|
||||||
|
|
||||||
|
and loop =
|
||||||
|
While of while_loop
|
||||||
|
| For of for_loop
|
||||||
|
|
||||||
|
and while_loop = (kwd_while * expr * block reg) reg
|
||||||
|
|
||||||
|
and for_loop =
|
||||||
|
ForInt of for_int reg
|
||||||
|
| ForCollect of for_collect reg
|
||||||
|
|
||||||
|
and for_int = <
|
||||||
|
kwd_for : kwd_for;
|
||||||
|
asgnmnt : asgnmnt_instr;
|
||||||
|
down : kwd_down option;
|
||||||
|
kwd_to : kwd_to;
|
||||||
|
bound : expr;
|
||||||
|
step : (kwd_step * expr) option;
|
||||||
|
block : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
and for_collect = <
|
||||||
|
kwd_for : kwd_for;
|
||||||
|
var : variable;
|
||||||
|
bind_to : (arrow * variable) option;
|
||||||
|
kwd_in : kwd_in;
|
||||||
|
expr : expr;
|
||||||
|
block : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
(* Expressions *)
|
||||||
|
|
||||||
|
and expr =
|
||||||
|
Or of (expr * bool_or * expr) reg
|
||||||
|
| And of (expr * bool_and * expr) reg
|
||||||
|
| Lt of (expr * lt * expr) reg
|
||||||
|
| Leq of (expr * leq * expr) reg
|
||||||
|
| Gt of (expr * gt * expr) reg
|
||||||
|
| Geq of (expr * geq * expr) reg
|
||||||
|
| Equal of (expr * equal * expr) reg
|
||||||
|
| Neq of (expr * neq * expr) reg
|
||||||
|
| Cat of (expr * cat * expr) reg
|
||||||
|
| Cons of (expr * cons * expr) reg
|
||||||
|
| Add of (expr * plus * expr) reg
|
||||||
|
| Sub of (expr * minus * expr) reg
|
||||||
|
| Mult of (expr * times * expr) reg
|
||||||
|
| Div of (expr * slash * expr) reg
|
||||||
|
| Mod of (expr * kwd_mod * expr) reg
|
||||||
|
| Neg of (minus * expr) reg
|
||||||
|
| Not of (kwd_not * expr) reg
|
||||||
|
| Int of (Lexer.lexeme * Z.t) reg
|
||||||
|
| Var of Lexer.lexeme reg
|
||||||
|
| String of Lexer.lexeme reg
|
||||||
|
| Bytes of (Lexer.lexeme * MBytes.t) reg
|
||||||
|
| False of c_False
|
||||||
|
| True of c_True
|
||||||
|
| Unit of c_Unit
|
||||||
|
| Tuple of tuple
|
||||||
|
| List of (expr, comma) nsepseq brackets
|
||||||
|
| EmptyList of empty_list
|
||||||
|
| Set of (expr, comma) nsepseq braces
|
||||||
|
| EmptySet of empty_set
|
||||||
|
| NoneExpr of none_expr
|
||||||
|
| FunCall of fun_call
|
||||||
|
| ConstrApp of constr_app
|
||||||
|
| SomeApp of (c_Some * arguments) reg
|
||||||
|
| MapLookUp of map_lookup reg
|
||||||
|
| ParExpr of expr par
|
||||||
|
|
||||||
|
and tuple = (expr, comma) nsepseq par
|
||||||
|
|
||||||
|
and empty_list =
|
||||||
|
(lbracket * rbracket * colon * type_expr) par
|
||||||
|
|
||||||
|
and empty_set =
|
||||||
|
(lbrace * rbrace * colon * type_expr) par
|
||||||
|
|
||||||
|
and none_expr =
|
||||||
|
(c_None * colon * type_expr) par
|
||||||
|
|
||||||
|
and fun_call = (fun_name * arguments) reg
|
||||||
|
|
||||||
|
and arguments = tuple
|
||||||
|
|
||||||
|
and constr_app = (constr * arguments) reg
|
||||||
|
|
||||||
|
and map_lookup = <
|
||||||
|
map_name : variable;
|
||||||
|
selector : dot;
|
||||||
|
index : expr brackets
|
||||||
|
>
|
||||||
|
|
||||||
|
(* Patterns *)
|
||||||
|
|
||||||
|
and pattern = (core_pattern, cons) nsepseq reg
|
||||||
|
|
||||||
|
and core_pattern =
|
||||||
|
PVar of Lexer.lexeme reg
|
||||||
|
| PWild of wild
|
||||||
|
| PInt of (Lexer.lexeme * Z.t) reg
|
||||||
|
| PBytes of (Lexer.lexeme * MBytes.t) reg
|
||||||
|
| PString of Lexer.lexeme reg
|
||||||
|
| PUnit of c_Unit
|
||||||
|
| PFalse of c_False
|
||||||
|
| PTrue of c_True
|
||||||
|
| PNone of c_None
|
||||||
|
| PSome of (c_Some * core_pattern par) reg
|
||||||
|
| PList of list_pattern
|
||||||
|
| PTuple of (core_pattern, comma) nsepseq par
|
||||||
|
|
||||||
|
and list_pattern =
|
||||||
|
Sugar of (core_pattern, comma) sepseq brackets
|
||||||
|
| Raw of (core_pattern * cons * pattern) par
|
||||||
|
|
||||||
|
(* Projecting regions *)
|
||||||
|
|
||||||
|
open Region
|
||||||
|
|
||||||
|
let type_expr_to_region = function
|
||||||
|
Prod node -> node.region
|
||||||
|
| Sum node -> node.region
|
||||||
|
| Record node -> node.region
|
||||||
|
| TypeApp node -> node.region
|
||||||
|
| ParType node -> node.region
|
||||||
|
| TAlias node -> node.region
|
||||||
|
|
||||||
|
let expr_to_region = function
|
||||||
|
Or {region; _}
|
||||||
|
| And {region; _}
|
||||||
|
| Lt {region; _}
|
||||||
|
| Leq {region; _}
|
||||||
|
| Gt {region; _}
|
||||||
|
| Geq {region; _}
|
||||||
|
| Equal {region; _}
|
||||||
|
| Neq {region; _}
|
||||||
|
| Cat {region; _}
|
||||||
|
| Cons {region; _}
|
||||||
|
| Add {region; _}
|
||||||
|
| Sub {region; _}
|
||||||
|
| Mult {region; _}
|
||||||
|
| Div {region; _}
|
||||||
|
| Mod {region; _}
|
||||||
|
| Neg {region; _}
|
||||||
|
| Not {region; _}
|
||||||
|
| Int {region; _}
|
||||||
|
| Var {region; _}
|
||||||
|
| String {region; _}
|
||||||
|
| Bytes {region; _}
|
||||||
|
| False region
|
||||||
|
| True region
|
||||||
|
| Unit region
|
||||||
|
| Tuple {region; _}
|
||||||
|
| List {region; _}
|
||||||
|
| EmptyList {region; _}
|
||||||
|
| Set {region; _}
|
||||||
|
| EmptySet {region; _}
|
||||||
|
| NoneExpr {region; _}
|
||||||
|
| FunCall {region; _}
|
||||||
|
| ConstrApp {region; _}
|
||||||
|
| SomeApp {region; _}
|
||||||
|
| MapLookUp {region; _}
|
||||||
|
| ParExpr {region; _} -> region
|
||||||
|
|
||||||
|
let var_kind_to_region = function
|
||||||
|
Mutable region
|
||||||
|
| Const region -> region
|
||||||
|
|
||||||
|
let instr_to_region = function
|
||||||
|
Single Cond {region;_}
|
||||||
|
| Single Match {region; _}
|
||||||
|
| Single Asgnmnt {region; _}
|
||||||
|
| Single Loop While {region; _}
|
||||||
|
| Single Loop For ForInt {region; _}
|
||||||
|
| Single Loop For ForCollect {region; _}
|
||||||
|
| Single ProcCall {region; _}
|
||||||
|
| Single Null region
|
||||||
|
| Block {region; _} -> region
|
||||||
|
|
||||||
|
let core_pattern_to_region = function
|
||||||
|
PVar {region; _}
|
||||||
|
| PWild region
|
||||||
|
| PInt {region; _}
|
||||||
|
| PBytes {region; _}
|
||||||
|
| PString {region; _}
|
||||||
|
| PUnit region
|
||||||
|
| PFalse region
|
||||||
|
| PTrue region
|
||||||
|
| PNone region
|
||||||
|
| PSome {region; _}
|
||||||
|
| PList Sugar {region; _}
|
||||||
|
| PList Raw {region; _}
|
||||||
|
| PTuple {region; _} -> region
|
||||||
|
|
||||||
|
(* Printing the tokens with their source regions *)
|
||||||
|
|
||||||
|
let printf = Printf.printf
|
||||||
|
|
||||||
|
let compact (region: Region.t) =
|
||||||
|
region#compact ~offsets:EvalOpt.offsets EvalOpt.mode
|
||||||
|
|
||||||
|
let print_nsepseq sep print (head,tail) =
|
||||||
|
let print_aux (sep_reg, item) =
|
||||||
|
printf "%s: %s\n" (compact sep_reg) sep;
|
||||||
|
print item
|
||||||
|
in print head; List.iter print_aux tail
|
||||||
|
|
||||||
|
let print_sepseq sep print = function
|
||||||
|
None -> ()
|
||||||
|
| Some seq -> print_nsepseq sep print seq
|
||||||
|
|
||||||
|
let print_token region lexeme =
|
||||||
|
printf "%s: %s\n"(compact region) lexeme
|
||||||
|
|
||||||
|
let print_var {region; value=lexeme} =
|
||||||
|
printf "%s: Ident \"%s\"\n" (compact region) lexeme
|
||||||
|
|
||||||
|
let print_constr {region; value=lexeme} =
|
||||||
|
printf "%s: Constr \"%s\"\n"
|
||||||
|
(compact region) lexeme
|
||||||
|
|
||||||
|
let print_string {region; value=lexeme} =
|
||||||
|
printf "%s: String \"%s\"\n"
|
||||||
|
(compact region) lexeme
|
||||||
|
|
||||||
|
let print_bytes {region; value = lexeme, abstract} =
|
||||||
|
printf "%s: Bytes (\"%s\", \"0x%s\")\n"
|
||||||
|
(compact region) lexeme
|
||||||
|
(MBytes.to_hex abstract |> Hex.to_string)
|
||||||
|
|
||||||
|
let print_int {region; value = lexeme, abstract} =
|
||||||
|
printf "%s: Int (\"%s\", %s)\n"
|
||||||
|
(compact region) lexeme
|
||||||
|
(Z.to_string abstract)
|
||||||
|
|
||||||
|
let rec print_tokens ast =
|
||||||
|
List.iter print_type_decl ast#types;
|
||||||
|
print_parameter_decl ast#parameter;
|
||||||
|
print_storage_decl ast#storage;
|
||||||
|
print_operations_decl ast#operations;
|
||||||
|
List.iter print_lambda_decl ast#lambdas;
|
||||||
|
print_block ast#block;
|
||||||
|
print_token ast#eof "EOF"
|
||||||
|
|
||||||
|
and print_parameter_decl {value=node; _} =
|
||||||
|
let kwd_parameter, variable, colon, type_expr = node in
|
||||||
|
print_token kwd_parameter "parameter";
|
||||||
|
print_var variable;
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_storage_decl {value=node; _} =
|
||||||
|
let kwd_storage, type_expr = node in
|
||||||
|
print_token kwd_storage "storage";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_operations_decl {value=node; _} =
|
||||||
|
let kwd_operations, type_expr = node in
|
||||||
|
print_token kwd_operations "operations";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_type_decl {value=node; _} =
|
||||||
|
let kwd_type, type_name, kwd_is, type_expr = node in
|
||||||
|
print_token kwd_type "type";
|
||||||
|
print_var type_name;
|
||||||
|
print_token kwd_is "is";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_type_expr = function
|
||||||
|
Prod cartesian -> print_cartesian cartesian
|
||||||
|
| Sum sum_type -> print_sum_type sum_type
|
||||||
|
| Record record_type -> print_record_type record_type
|
||||||
|
| TypeApp type_app -> print_type_app type_app
|
||||||
|
| ParType par_type -> print_par_type par_type
|
||||||
|
| TAlias type_alias -> print_var type_alias
|
||||||
|
|
||||||
|
and print_cartesian {value=sequence; _} =
|
||||||
|
print_nsepseq "*" print_type_expr sequence
|
||||||
|
|
||||||
|
and print_variant {value=node; _} =
|
||||||
|
let constr, kwd_of, cartesian = node in
|
||||||
|
print_constr constr;
|
||||||
|
print_token kwd_of "of";
|
||||||
|
print_cartesian cartesian
|
||||||
|
|
||||||
|
and print_sum_type {value=sequence; _} =
|
||||||
|
print_nsepseq "|" print_variant sequence
|
||||||
|
|
||||||
|
and print_record_type {value=node; _} =
|
||||||
|
let kwd_record, field_decls, kwd_end = node in
|
||||||
|
print_token kwd_record "record";
|
||||||
|
print_field_decls field_decls;
|
||||||
|
print_token kwd_end "end"
|
||||||
|
|
||||||
|
and print_type_app {value=node; _} =
|
||||||
|
let type_name, type_tuple = node in
|
||||||
|
print_var type_name;
|
||||||
|
print_type_tuple type_tuple
|
||||||
|
|
||||||
|
and print_par_type {value=node; _} =
|
||||||
|
let lpar, type_expr, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_type_expr type_expr;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_field_decls sequence =
|
||||||
|
print_nsepseq ";" print_field_decl sequence
|
||||||
|
|
||||||
|
and print_field_decl {value=node; _} =
|
||||||
|
let var, colon, type_expr = node in
|
||||||
|
print_var var;
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_type_tuple {value=node; _} =
|
||||||
|
let lpar, sequence, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_nsepseq "," print_var sequence;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_lambda_decl = function
|
||||||
|
FunDecl fun_decl -> print_fun_decl fun_decl
|
||||||
|
| ProcDecl proc_decl -> print_proc_decl proc_decl
|
||||||
|
|
||||||
|
and print_fun_decl {value=node; _} =
|
||||||
|
print_token node#kwd_function "function";
|
||||||
|
print_var node#var;
|
||||||
|
print_parameters node#param;
|
||||||
|
print_token node#colon ":";
|
||||||
|
print_type_expr node#ret_type;
|
||||||
|
print_token node#kwd_is "is";
|
||||||
|
print_block node#body;
|
||||||
|
print_token node#kwd_with "with";
|
||||||
|
print_expr node#return
|
||||||
|
|
||||||
|
and print_proc_decl {value=node; _} =
|
||||||
|
print_token node#kwd_procedure "procedure";
|
||||||
|
print_var node#var;
|
||||||
|
print_parameters node#param;
|
||||||
|
print_token node#kwd_is "is";
|
||||||
|
print_block node#body
|
||||||
|
|
||||||
|
and print_parameters {value=node; _} =
|
||||||
|
let lpar, sequence, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_nsepseq ";" print_param_decl sequence;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_param_decl {value=node; _} =
|
||||||
|
let var_kind, variable, colon, type_expr = node in
|
||||||
|
print_var_kind var_kind;
|
||||||
|
print_var variable;
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr
|
||||||
|
|
||||||
|
and print_var_kind = function
|
||||||
|
Mutable kwd_var -> print_token kwd_var "var"
|
||||||
|
| Const kwd_const -> print_token kwd_const "const"
|
||||||
|
|
||||||
|
and print_block {value=node; _} =
|
||||||
|
print_value_decls node#decls;
|
||||||
|
print_token node#opening "begin";
|
||||||
|
print_instructions node#instr;
|
||||||
|
print_token node#close "end"
|
||||||
|
|
||||||
|
and print_value_decls {value=sequence; _} =
|
||||||
|
print_sepseq ";" print_var_decl sequence
|
||||||
|
|
||||||
|
and print_var_decl {value=node; _} =
|
||||||
|
let setter =
|
||||||
|
match node#kind with
|
||||||
|
Mutable _ -> ":="
|
||||||
|
| Const _ -> "=" in
|
||||||
|
print_var_kind node#kind;
|
||||||
|
print_var node#var;
|
||||||
|
print_token node#colon ":";
|
||||||
|
print_type_expr node#vtype;
|
||||||
|
print_token node#setter setter;
|
||||||
|
print_expr node#init
|
||||||
|
|
||||||
|
and print_instructions {value=sequence; _} =
|
||||||
|
print_nsepseq ";" print_instruction sequence
|
||||||
|
|
||||||
|
and print_instruction = function
|
||||||
|
Single instr -> print_single_instr instr
|
||||||
|
| Block block -> print_block block
|
||||||
|
|
||||||
|
and print_single_instr = function
|
||||||
|
Cond {value; _} -> print_conditional value
|
||||||
|
| Match {value; _} -> print_match_instr value
|
||||||
|
| Asgnmnt instr -> print_asgnmnt_instr instr
|
||||||
|
| Loop loop -> print_loop loop
|
||||||
|
| ProcCall fun_call -> print_fun_call fun_call
|
||||||
|
| Null kwd_null -> print_token kwd_null "null"
|
||||||
|
|
||||||
|
and print_conditional node =
|
||||||
|
print_token node#kwd_if "if";
|
||||||
|
print_expr node#test;
|
||||||
|
print_token node#kwd_then "then";
|
||||||
|
print_instruction node#ifso;
|
||||||
|
print_token node#kwd_else "else";
|
||||||
|
print_instruction node#ifnot
|
||||||
|
|
||||||
|
and print_match_instr node =
|
||||||
|
print_token node#kwd_match "match";
|
||||||
|
print_expr node#expr;
|
||||||
|
print_token node#kwd_with "with";
|
||||||
|
print_cases node#cases;
|
||||||
|
print_token node#kwd_end "end"
|
||||||
|
|
||||||
|
and print_cases {value=sequence; _} =
|
||||||
|
print_nsepseq "|" print_case sequence
|
||||||
|
|
||||||
|
and print_case {value=node; _} =
|
||||||
|
let pattern, arrow, instruction = node in
|
||||||
|
print_pattern pattern;
|
||||||
|
print_token arrow "->";
|
||||||
|
print_instruction instruction
|
||||||
|
|
||||||
|
and print_asgnmnt_instr {value=node; _} =
|
||||||
|
let variable, asgnmnt, expr = node in
|
||||||
|
print_var variable;
|
||||||
|
print_token asgnmnt ":=";
|
||||||
|
print_expr expr
|
||||||
|
|
||||||
|
and print_loop = function
|
||||||
|
While while_loop -> print_while_loop while_loop
|
||||||
|
| For for_loop -> print_for_loop for_loop
|
||||||
|
|
||||||
|
and print_while_loop {value=node; _} =
|
||||||
|
let kwd_while, expr, block = node in
|
||||||
|
print_token kwd_while "while";
|
||||||
|
print_expr expr;
|
||||||
|
print_block block
|
||||||
|
|
||||||
|
and print_for_loop = function
|
||||||
|
ForInt for_int -> print_for_int for_int
|
||||||
|
| ForCollect for_collect -> print_for_collect for_collect
|
||||||
|
|
||||||
|
and print_for_int {value=node; _} =
|
||||||
|
print_token node#kwd_for "for";
|
||||||
|
print_asgnmnt_instr node#asgnmnt;
|
||||||
|
print_down node#down;
|
||||||
|
print_token node#kwd_to "to";
|
||||||
|
print_expr node#bound;
|
||||||
|
print_step node#step;
|
||||||
|
print_block node#block
|
||||||
|
|
||||||
|
and print_down = function
|
||||||
|
Some kwd_down -> print_token kwd_down "down"
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
|
and print_step = function
|
||||||
|
Some (kwd_step, expr) ->
|
||||||
|
print_token kwd_step "step";
|
||||||
|
print_expr expr
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
|
and print_for_collect {value=node; _} =
|
||||||
|
print_token node#kwd_for "for";
|
||||||
|
print_var node#var;
|
||||||
|
print_bind_to node#bind_to;
|
||||||
|
print_token node#kwd_in "in";
|
||||||
|
print_expr node#expr;
|
||||||
|
print_block node#block
|
||||||
|
|
||||||
|
and print_bind_to = function
|
||||||
|
Some (arrow, variable) ->
|
||||||
|
print_token arrow "->";
|
||||||
|
print_var variable
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
|
and print_expr = function
|
||||||
|
Or {value = expr1, bool_or, expr2; _} ->
|
||||||
|
print_expr expr1; print_token bool_or "||"; print_expr expr2
|
||||||
|
| And {value = expr1, bool_and, expr2; _} ->
|
||||||
|
print_expr expr1; print_token bool_and "&&"; print_expr expr2
|
||||||
|
| Lt {value = expr1, lt, expr2; _} ->
|
||||||
|
print_expr expr1; print_token lt "<"; print_expr expr2
|
||||||
|
| Leq {value = expr1, leq, expr2; _} ->
|
||||||
|
print_expr expr1; print_token leq "<="; print_expr expr2
|
||||||
|
| Gt {value = expr1, gt, expr2; _} ->
|
||||||
|
print_expr expr1; print_token gt ">"; print_expr expr2
|
||||||
|
| Geq {value = expr1, geq, expr2; _} ->
|
||||||
|
print_expr expr1; print_token geq ">="; print_expr expr2
|
||||||
|
| Equal {value = expr1, equal, expr2; _} ->
|
||||||
|
print_expr expr1; print_token equal "="; print_expr expr2
|
||||||
|
| Neq {value = expr1, neq, expr2; _} ->
|
||||||
|
print_expr expr1; print_token neq "=/="; print_expr expr2
|
||||||
|
| Cat {value = expr1, cat, expr2; _} ->
|
||||||
|
print_expr expr1; print_token cat "^"; print_expr expr2
|
||||||
|
| Cons {value = expr1, cons, expr2; _} ->
|
||||||
|
print_expr expr1; print_token cons "<:"; print_expr expr2
|
||||||
|
| Add {value = expr1, add, expr2; _} ->
|
||||||
|
print_expr expr1; print_token add "+"; print_expr expr2
|
||||||
|
| Sub {value = expr1, sub, expr2; _} ->
|
||||||
|
print_expr expr1; print_token sub "-"; print_expr expr2
|
||||||
|
| Mult {value = expr1, mult, expr2; _} ->
|
||||||
|
print_expr expr1; print_token mult "*"; print_expr expr2
|
||||||
|
| Div {value = expr1, div, expr2; _} ->
|
||||||
|
print_expr expr1; print_token div "/"; print_expr expr2
|
||||||
|
| Mod {value = expr1, kwd_mod, expr2; _} ->
|
||||||
|
print_expr expr1; print_token kwd_mod "mod"; print_expr expr2
|
||||||
|
| Neg {value = minus, expr; _} ->
|
||||||
|
print_token minus "-"; print_expr expr
|
||||||
|
| Not {value = kwd_not, expr; _} ->
|
||||||
|
print_token kwd_not "not"; print_expr expr
|
||||||
|
| Int i -> print_int i
|
||||||
|
| Var v -> print_var v
|
||||||
|
| String s -> print_string s
|
||||||
|
| Bytes b -> print_bytes b
|
||||||
|
| False region -> print_token region "False"
|
||||||
|
| True region -> print_token region "True"
|
||||||
|
| Unit region -> print_token region "Unit"
|
||||||
|
| Tuple tuple -> print_tuple tuple
|
||||||
|
| List list -> print_list list
|
||||||
|
| EmptyList elist -> print_empty_list elist
|
||||||
|
| Set set -> print_set set
|
||||||
|
| EmptySet eset -> print_empty_set eset
|
||||||
|
| NoneExpr nexpr -> print_none_expr nexpr
|
||||||
|
| FunCall fun_call -> print_fun_call fun_call
|
||||||
|
| ConstrApp capp -> print_constr_app capp
|
||||||
|
| SomeApp sapp -> print_some_app sapp
|
||||||
|
| MapLookUp lookup -> print_map_lookup lookup
|
||||||
|
| ParExpr pexpr -> print_par_expr pexpr
|
||||||
|
|
||||||
|
and print_tuple {value=node; _} =
|
||||||
|
let lpar, sequence, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_nsepseq "," print_expr sequence;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_list {value=node; _} =
|
||||||
|
let lbra, sequence, rbra = node in
|
||||||
|
print_token lbra "[";
|
||||||
|
print_nsepseq "," print_expr sequence;
|
||||||
|
print_token rbra "]"
|
||||||
|
|
||||||
|
and print_empty_list {value=node; _} =
|
||||||
|
let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_token lbracket "[";
|
||||||
|
print_token rbracket "]";
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_set {value=node; _} =
|
||||||
|
let lbrace, sequence, rbrace = node in
|
||||||
|
print_token lbrace "{";
|
||||||
|
print_nsepseq "," print_expr sequence;
|
||||||
|
print_token rbrace "}"
|
||||||
|
|
||||||
|
and print_empty_set {value=node; _} =
|
||||||
|
let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_token lbrace "{";
|
||||||
|
print_token rbrace "}";
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_none_expr {value=node; _} =
|
||||||
|
let lpar, (c_None, colon, type_expr), rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_token c_None "None";
|
||||||
|
print_token colon ":";
|
||||||
|
print_type_expr type_expr;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_fun_call {value=node; _} =
|
||||||
|
let fun_name, arguments = node in
|
||||||
|
print_var fun_name;
|
||||||
|
print_tuple arguments
|
||||||
|
|
||||||
|
and print_constr_app {value=node; _} =
|
||||||
|
let constr, arguments = node in
|
||||||
|
print_constr constr;
|
||||||
|
print_tuple arguments
|
||||||
|
|
||||||
|
and print_some_app {value=node; _} =
|
||||||
|
let c_Some, arguments = node in
|
||||||
|
print_token c_Some "Some";
|
||||||
|
print_tuple arguments
|
||||||
|
|
||||||
|
and print_map_lookup {value=node; _} =
|
||||||
|
let {value = lbracket, expr, rbracket; _} = node#index in
|
||||||
|
print_var node#map_name;
|
||||||
|
print_token node#selector ".";
|
||||||
|
print_token lbracket "[";
|
||||||
|
print_expr expr;
|
||||||
|
print_token rbracket "]"
|
||||||
|
|
||||||
|
and print_par_expr {value=node; _} =
|
||||||
|
let lpar, expr, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_expr expr;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_pattern {value=sequence; _} =
|
||||||
|
print_nsepseq "<:" print_core_pattern sequence
|
||||||
|
|
||||||
|
and print_core_pattern = function
|
||||||
|
PVar var -> print_var var
|
||||||
|
| PWild wild -> print_token wild "_"
|
||||||
|
| PInt i -> print_int i
|
||||||
|
| PBytes b -> print_bytes b
|
||||||
|
| PString s -> print_string s
|
||||||
|
| PUnit region -> print_token region "Unit"
|
||||||
|
| PFalse region -> print_token region "False"
|
||||||
|
| PTrue region -> print_token region "True"
|
||||||
|
| PNone region -> print_token region "None"
|
||||||
|
| PSome psome -> print_psome psome
|
||||||
|
| PList pattern -> print_list_pattern pattern
|
||||||
|
| PTuple ptuple -> print_ptuple ptuple
|
||||||
|
|
||||||
|
and print_psome {value=node; _} =
|
||||||
|
let c_Some, patterns = node in
|
||||||
|
print_token c_Some "Some";
|
||||||
|
print_patterns patterns
|
||||||
|
|
||||||
|
and print_patterns {value=node; _} =
|
||||||
|
let lpar, core_pattern, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_core_pattern core_pattern;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_list_pattern = function
|
||||||
|
Sugar sugar -> print_sugar sugar
|
||||||
|
| Raw raw -> print_raw raw
|
||||||
|
|
||||||
|
and print_sugar {value=node; _} =
|
||||||
|
let lbracket, sequence, rbracket = node in
|
||||||
|
print_token lbracket "[";
|
||||||
|
print_sepseq "," print_core_pattern sequence;
|
||||||
|
print_token rbracket "]"
|
||||||
|
|
||||||
|
and print_raw {value=node; _} =
|
||||||
|
let lpar, (core_pattern, cons, pattern), rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_core_pattern core_pattern;
|
||||||
|
print_token cons "<:";
|
||||||
|
print_pattern pattern;
|
||||||
|
print_token rpar ")"
|
||||||
|
|
||||||
|
and print_ptuple {value=node; _} =
|
||||||
|
let lpar, sequence, rpar = node in
|
||||||
|
print_token lpar "(";
|
||||||
|
print_nsepseq "," print_core_pattern sequence;
|
||||||
|
print_token rpar ")"
|
384
AST.mli
Normal file
384
AST.mli
Normal file
@ -0,0 +1,384 @@
|
|||||||
|
(* Abstract Syntax Tree (AST) for Ligo *)
|
||||||
|
|
||||||
|
open Utils
|
||||||
|
|
||||||
|
(* Regions
|
||||||
|
|
||||||
|
The AST carries all the regions where tokens have been found by the
|
||||||
|
lexer, plus additional regions corresponding to whole subtrees
|
||||||
|
(like entire expressions, patterns etc.). These regions are needed
|
||||||
|
for error reporting and source-to-source transformations. To make
|
||||||
|
these pervasive regions more legible, we define singleton types for
|
||||||
|
the symbols, keywords etc. with suggestive names like "kwd_and"
|
||||||
|
denoting the _region_ of the occurrence of the keyword "and".
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'a reg = 'a Region.reg
|
||||||
|
|
||||||
|
val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t
|
||||||
|
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
|
||||||
|
val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
|
||||||
|
|
||||||
|
(* Keywords of Ligo *)
|
||||||
|
|
||||||
|
type kwd_begin = Region.t
|
||||||
|
type kwd_const = Region.t
|
||||||
|
type kwd_down = Region.t
|
||||||
|
type kwd_if = Region.t
|
||||||
|
type kwd_in = Region.t
|
||||||
|
type kwd_is = Region.t
|
||||||
|
type kwd_for = Region.t
|
||||||
|
type kwd_function = Region.t
|
||||||
|
type kwd_parameter = Region.t
|
||||||
|
type kwd_storage = Region.t
|
||||||
|
type kwd_type = Region.t
|
||||||
|
type kwd_of = Region.t
|
||||||
|
type kwd_operations = Region.t
|
||||||
|
type kwd_var = Region.t
|
||||||
|
type kwd_end = Region.t
|
||||||
|
type kwd_then = Region.t
|
||||||
|
type kwd_else = Region.t
|
||||||
|
type kwd_match = Region.t
|
||||||
|
type kwd_procedure = Region.t
|
||||||
|
type kwd_null = Region.t
|
||||||
|
type kwd_record = Region.t
|
||||||
|
type kwd_step = Region.t
|
||||||
|
type kwd_to = Region.t
|
||||||
|
type kwd_mod = Region.t
|
||||||
|
type kwd_not = Region.t
|
||||||
|
type kwd_while = Region.t
|
||||||
|
type kwd_with = Region.t
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
type c_False = Region.t
|
||||||
|
type c_None = Region.t
|
||||||
|
type c_Some = Region.t
|
||||||
|
type c_True = Region.t
|
||||||
|
type c_Unit = Region.t
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
type semi = Region.t
|
||||||
|
type comma = Region.t
|
||||||
|
type lpar = Region.t
|
||||||
|
type rpar = Region.t
|
||||||
|
type lbrace = Region.t
|
||||||
|
type rbrace = Region.t
|
||||||
|
type lbracket = Region.t
|
||||||
|
type rbracket = Region.t
|
||||||
|
type cons = Region.t
|
||||||
|
type vbar = Region.t
|
||||||
|
type arrow = Region.t
|
||||||
|
type asgnmnt = Region.t
|
||||||
|
type equal = Region.t
|
||||||
|
type colon = Region.t
|
||||||
|
type bool_or = Region.t
|
||||||
|
type bool_and = Region.t
|
||||||
|
type lt = Region.t
|
||||||
|
type leq = Region.t
|
||||||
|
type gt = Region.t
|
||||||
|
type geq = Region.t
|
||||||
|
type neq = Region.t
|
||||||
|
type plus = Region.t
|
||||||
|
type minus = Region.t
|
||||||
|
type slash = Region.t
|
||||||
|
type times = Region.t
|
||||||
|
type dot = Region.t
|
||||||
|
type wild = Region.t
|
||||||
|
type cat = Region.t
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
type eof = Region.t
|
||||||
|
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
type variable = string reg
|
||||||
|
type fun_name = string reg
|
||||||
|
type type_name = string reg
|
||||||
|
type field_name = string reg
|
||||||
|
type map_name = string reg
|
||||||
|
type constr = string reg
|
||||||
|
|
||||||
|
(* Comma-separated non-empty lists *)
|
||||||
|
|
||||||
|
type 'a csv = ('a, comma) nsepseq
|
||||||
|
|
||||||
|
(* Bar-separated non-empty lists *)
|
||||||
|
|
||||||
|
type 'a bsv = ('a, vbar) nsepseq
|
||||||
|
|
||||||
|
(* Parentheses *)
|
||||||
|
|
||||||
|
type 'a par = (lpar * 'a * rpar) reg
|
||||||
|
|
||||||
|
(* Brackets compounds *)
|
||||||
|
|
||||||
|
type 'a brackets = (lbracket * 'a * rbracket) reg
|
||||||
|
|
||||||
|
(* Braced compounds *)
|
||||||
|
|
||||||
|
type 'a braces = (lbrace * 'a * rbrace) reg
|
||||||
|
|
||||||
|
(* The Abstract Syntax Tree *)
|
||||||
|
|
||||||
|
type t = <
|
||||||
|
types : type_decl list;
|
||||||
|
parameter : parameter_decl;
|
||||||
|
storage : storage_decl;
|
||||||
|
operations : operations_decl;
|
||||||
|
lambdas : lambda_decl list;
|
||||||
|
block : block reg;
|
||||||
|
eof : eof
|
||||||
|
>
|
||||||
|
|
||||||
|
and ast = t
|
||||||
|
|
||||||
|
and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and storage_decl = (kwd_storage * type_expr) reg
|
||||||
|
|
||||||
|
and operations_decl = (kwd_operations * type_expr) reg
|
||||||
|
|
||||||
|
(* Type declarations *)
|
||||||
|
|
||||||
|
and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
|
||||||
|
|
||||||
|
and type_expr =
|
||||||
|
Prod of cartesian
|
||||||
|
| Sum of (variant, vbar) nsepseq reg
|
||||||
|
| Record of record_type
|
||||||
|
| TypeApp of (type_name * type_tuple) reg
|
||||||
|
| ParType of type_expr par
|
||||||
|
| TAlias of variable
|
||||||
|
|
||||||
|
and cartesian = (type_expr, times) nsepseq reg
|
||||||
|
|
||||||
|
and variant = (constr * kwd_of * cartesian) reg
|
||||||
|
|
||||||
|
and record_type = (kwd_record * field_decls * kwd_end) reg
|
||||||
|
|
||||||
|
and field_decls = (field_decl, semi) nsepseq
|
||||||
|
|
||||||
|
and field_decl = (variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and type_tuple = (type_name, comma) nsepseq par
|
||||||
|
|
||||||
|
(* Function and procedure declarations *)
|
||||||
|
|
||||||
|
and lambda_decl =
|
||||||
|
FunDecl of fun_decl reg
|
||||||
|
| ProcDecl of proc_decl reg
|
||||||
|
|
||||||
|
and fun_decl = <
|
||||||
|
kwd_function : kwd_function;
|
||||||
|
var : variable;
|
||||||
|
param : parameters;
|
||||||
|
colon : colon;
|
||||||
|
ret_type : type_expr;
|
||||||
|
kwd_is : kwd_is;
|
||||||
|
body : block reg;
|
||||||
|
kwd_with : kwd_with;
|
||||||
|
return : expr
|
||||||
|
>
|
||||||
|
|
||||||
|
and proc_decl = <
|
||||||
|
kwd_procedure : kwd_procedure;
|
||||||
|
var : variable;
|
||||||
|
param : parameters;
|
||||||
|
kwd_is : kwd_is;
|
||||||
|
body : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
and parameters = (param_decl, semi) nsepseq par
|
||||||
|
|
||||||
|
and param_decl = (var_kind * variable * colon * type_expr) reg
|
||||||
|
|
||||||
|
and var_kind =
|
||||||
|
Mutable of kwd_var
|
||||||
|
| Const of kwd_const
|
||||||
|
|
||||||
|
and block = <
|
||||||
|
decls : value_decls;
|
||||||
|
opening : kwd_begin;
|
||||||
|
instr : instructions;
|
||||||
|
close : kwd_end
|
||||||
|
>
|
||||||
|
|
||||||
|
and value_decls = (var_decl reg, semi) sepseq reg
|
||||||
|
|
||||||
|
and var_decl = <
|
||||||
|
kind : var_kind;
|
||||||
|
var : variable;
|
||||||
|
colon : colon;
|
||||||
|
vtype : type_expr;
|
||||||
|
setter : Region.t; (* "=" or ":=" *)
|
||||||
|
init : expr
|
||||||
|
>
|
||||||
|
|
||||||
|
and instructions = (instruction, semi) nsepseq reg
|
||||||
|
|
||||||
|
and instruction =
|
||||||
|
Single of single_instr
|
||||||
|
| Block of block reg
|
||||||
|
|
||||||
|
and single_instr =
|
||||||
|
Cond of conditional reg
|
||||||
|
| Match of match_instr reg
|
||||||
|
| Asgnmnt of asgnmnt_instr
|
||||||
|
| Loop of loop
|
||||||
|
| ProcCall of fun_call
|
||||||
|
| Null of kwd_null
|
||||||
|
|
||||||
|
and conditional = <
|
||||||
|
kwd_if : kwd_if;
|
||||||
|
test : expr;
|
||||||
|
kwd_then : kwd_then;
|
||||||
|
ifso : instruction;
|
||||||
|
kwd_else : kwd_else;
|
||||||
|
ifnot : instruction
|
||||||
|
>
|
||||||
|
|
||||||
|
and match_instr = <
|
||||||
|
kwd_match : kwd_match;
|
||||||
|
expr : expr;
|
||||||
|
kwd_with : kwd_with;
|
||||||
|
cases : cases;
|
||||||
|
kwd_end : kwd_end
|
||||||
|
>
|
||||||
|
|
||||||
|
and cases = (case, vbar) nsepseq reg
|
||||||
|
|
||||||
|
and case = (pattern * arrow * instruction) reg
|
||||||
|
|
||||||
|
and asgnmnt_instr = (variable * asgnmnt * expr) reg
|
||||||
|
|
||||||
|
and loop =
|
||||||
|
While of while_loop
|
||||||
|
| For of for_loop
|
||||||
|
|
||||||
|
and while_loop = (kwd_while * expr * block reg) reg
|
||||||
|
|
||||||
|
and for_loop =
|
||||||
|
ForInt of for_int reg
|
||||||
|
| ForCollect of for_collect reg
|
||||||
|
|
||||||
|
and for_int = <
|
||||||
|
kwd_for : kwd_for;
|
||||||
|
asgnmnt : asgnmnt_instr;
|
||||||
|
down : kwd_down option;
|
||||||
|
kwd_to : kwd_to;
|
||||||
|
bound : expr;
|
||||||
|
step : (kwd_step * expr) option;
|
||||||
|
block : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
and for_collect = <
|
||||||
|
kwd_for : kwd_for;
|
||||||
|
var : variable;
|
||||||
|
bind_to : (arrow * variable) option;
|
||||||
|
kwd_in : kwd_in;
|
||||||
|
expr : expr;
|
||||||
|
block : block reg
|
||||||
|
>
|
||||||
|
|
||||||
|
(* Expressions *)
|
||||||
|
|
||||||
|
and expr =
|
||||||
|
Or of (expr * bool_or * expr) reg
|
||||||
|
| And of (expr * bool_and * expr) reg
|
||||||
|
| Lt of (expr * lt * expr) reg
|
||||||
|
| Leq of (expr * leq * expr) reg
|
||||||
|
| Gt of (expr * gt * expr) reg
|
||||||
|
| Geq of (expr * geq * expr) reg
|
||||||
|
| Equal of (expr * equal * expr) reg
|
||||||
|
| Neq of (expr * neq * expr) reg
|
||||||
|
| Cat of (expr * cat * expr) reg
|
||||||
|
| Cons of (expr * cons * expr) reg
|
||||||
|
| Add of (expr * plus * expr) reg
|
||||||
|
| Sub of (expr * minus * expr) reg
|
||||||
|
| Mult of (expr * times * expr) reg
|
||||||
|
| Div of (expr * slash * expr) reg
|
||||||
|
| Mod of (expr * kwd_mod * expr) reg
|
||||||
|
| Neg of (minus * expr) reg
|
||||||
|
| Not of (kwd_not * expr) reg
|
||||||
|
| Int of (Lexer.lexeme * Z.t) reg
|
||||||
|
| Var of Lexer.lexeme reg
|
||||||
|
| String of Lexer.lexeme reg
|
||||||
|
| Bytes of (Lexer.lexeme * MBytes.t) reg
|
||||||
|
| False of c_False
|
||||||
|
| True of c_True
|
||||||
|
| Unit of c_Unit
|
||||||
|
| Tuple of tuple
|
||||||
|
| List of (expr, comma) nsepseq brackets
|
||||||
|
| EmptyList of empty_list
|
||||||
|
| Set of (expr, comma) nsepseq braces
|
||||||
|
| EmptySet of empty_set
|
||||||
|
| NoneExpr of none_expr
|
||||||
|
| FunCall of fun_call
|
||||||
|
| ConstrApp of constr_app
|
||||||
|
| SomeApp of (c_Some * arguments) reg
|
||||||
|
| MapLookUp of map_lookup reg
|
||||||
|
| ParExpr of expr par
|
||||||
|
|
||||||
|
and tuple = (expr, comma) nsepseq par
|
||||||
|
|
||||||
|
and empty_list =
|
||||||
|
(lbracket * rbracket * colon * type_expr) par
|
||||||
|
|
||||||
|
and empty_set =
|
||||||
|
(lbrace * rbrace * colon * type_expr) par
|
||||||
|
|
||||||
|
and none_expr =
|
||||||
|
(c_None * colon * type_expr) par
|
||||||
|
|
||||||
|
and fun_call = (fun_name * arguments) reg
|
||||||
|
|
||||||
|
and arguments = tuple
|
||||||
|
|
||||||
|
and constr_app = (constr * arguments) reg
|
||||||
|
|
||||||
|
and map_lookup = <
|
||||||
|
map_name : variable;
|
||||||
|
selector : dot;
|
||||||
|
index : expr brackets
|
||||||
|
>
|
||||||
|
|
||||||
|
(* Patterns *)
|
||||||
|
|
||||||
|
and pattern = (core_pattern, cons) nsepseq reg
|
||||||
|
|
||||||
|
and core_pattern =
|
||||||
|
PVar of Lexer.lexeme reg
|
||||||
|
| PWild of wild
|
||||||
|
| PInt of (Lexer.lexeme * Z.t) reg
|
||||||
|
| PBytes of (Lexer.lexeme * MBytes.t) reg
|
||||||
|
| PString of Lexer.lexeme reg
|
||||||
|
| PUnit of c_Unit
|
||||||
|
| PFalse of c_False
|
||||||
|
| PTrue of c_True
|
||||||
|
| PNone of c_None
|
||||||
|
| PSome of (c_Some * core_pattern par) reg
|
||||||
|
| PList of list_pattern
|
||||||
|
| PTuple of (core_pattern, comma) nsepseq par
|
||||||
|
|
||||||
|
and list_pattern =
|
||||||
|
Sugar of (core_pattern, comma) sepseq brackets
|
||||||
|
| Raw of (core_pattern * cons * pattern) par
|
||||||
|
|
||||||
|
(* Projecting regions *)
|
||||||
|
|
||||||
|
val type_expr_to_region : type_expr -> Region.t
|
||||||
|
|
||||||
|
val expr_to_region : expr -> Region.t
|
||||||
|
|
||||||
|
val var_kind_to_region : var_kind -> Region.t
|
||||||
|
|
||||||
|
val instr_to_region : instruction -> Region.t
|
||||||
|
|
||||||
|
val core_pattern_to_region : core_pattern -> Region.t
|
||||||
|
|
||||||
|
(* Printing *)
|
||||||
|
|
||||||
|
val print_tokens : t -> unit
|
143
EvalOpt.ml
Normal file
143
EvalOpt.ml
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
(* Parsing the command-line option for testing the Ligo lexer and
|
||||||
|
parser *)
|
||||||
|
|
||||||
|
let printf = Printf.printf
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
let abort msg =
|
||||||
|
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
|
||||||
|
|
||||||
|
(* Help *)
|
||||||
|
|
||||||
|
let help () =
|
||||||
|
let file = Filename.basename Sys.argv.(0) in
|
||||||
|
printf "Usage: %s [<option> ...] [<input>.li | \"-\"]\n" file;
|
||||||
|
print_endline "where <input>.li is the Ligo source file (default: stdin),";
|
||||||
|
print_endline "and each <option> (if any) is one of the following:";
|
||||||
|
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
|
||||||
|
print_endline " -t, --tokens Print tokens (lexer)";
|
||||||
|
print_endline " -u, --units Print tokens and markup (lexer)";
|
||||||
|
print_endline " -q, --quiet No output, except errors (default)";
|
||||||
|
print_endline " --columns Columns for source locations";
|
||||||
|
print_endline " --bytes Bytes for source locations";
|
||||||
|
print_endline " -v, --verbose=<stage> cmdline, parser";
|
||||||
|
print_endline " -h, --help This help";
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
(* Specifying the command-line options a la GNU *)
|
||||||
|
|
||||||
|
let copy = ref false
|
||||||
|
and tokens = ref false
|
||||||
|
and units = ref false
|
||||||
|
and quiet = ref false
|
||||||
|
and columns = ref false
|
||||||
|
and bytes = ref false
|
||||||
|
and verbose = ref Utils.String.Set.empty
|
||||||
|
and input = ref None
|
||||||
|
|
||||||
|
let split_at_colon = Str.(split (regexp ":"))
|
||||||
|
|
||||||
|
let add_verbose d =
|
||||||
|
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
|
||||||
|
!verbose
|
||||||
|
(split_at_colon d)
|
||||||
|
|
||||||
|
let specs =
|
||||||
|
let open! Getopt in [
|
||||||
|
'c', "copy", set copy true, None;
|
||||||
|
't', "tokens", set tokens true, None;
|
||||||
|
'u', "units", set units true, None;
|
||||||
|
'q', "quiet", set quiet true, None;
|
||||||
|
noshort, "columns", set columns true, None;
|
||||||
|
noshort, "bytes", set bytes true, None;
|
||||||
|
'v', "verbose", None, Some add_verbose;
|
||||||
|
'h', "help", Some help, None
|
||||||
|
]
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Handler of anonymous arguments *)
|
||||||
|
|
||||||
|
let anonymous arg =
|
||||||
|
match !input with
|
||||||
|
None -> input := Some arg
|
||||||
|
| Some _ -> abort (sprintf "Multiple inputs")
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Parsing the command-line options *)
|
||||||
|
|
||||||
|
try Getopt.parse_cmdline specs anonymous with
|
||||||
|
Getopt.Error msg -> abort msg
|
||||||
|
;;
|
||||||
|
|
||||||
|
(* Checking options and exporting them as non-mutable values *)
|
||||||
|
|
||||||
|
type command = Quiet | Copy | Units | Tokens
|
||||||
|
|
||||||
|
let cmd =
|
||||||
|
match !quiet, !copy, !units, !tokens with
|
||||||
|
false, false, false, false
|
||||||
|
| true, false, false, false -> Quiet
|
||||||
|
| false, true, false, false -> Copy
|
||||||
|
| false, false, true, false -> Units
|
||||||
|
| false, false, false, true -> Tokens
|
||||||
|
| _ -> abort "Choose one of -q, -c, -u, -t."
|
||||||
|
|
||||||
|
let string_of convert = function
|
||||||
|
None -> "None"
|
||||||
|
| Some s -> sprintf "Some %s" (convert s)
|
||||||
|
|
||||||
|
let quote s = sprintf "\"%s\"" s
|
||||||
|
|
||||||
|
let verbose_str =
|
||||||
|
let apply e a =
|
||||||
|
if a <> "" then sprintf "%s, %s" e a else e
|
||||||
|
in Utils.String.Set.fold apply !verbose ""
|
||||||
|
|
||||||
|
let print_opt () =
|
||||||
|
printf "COMMAND LINE\n";
|
||||||
|
printf "copy = %b\n" !copy;
|
||||||
|
printf "tokens = %b\n" !tokens;
|
||||||
|
printf "units = %b\n" !units;
|
||||||
|
printf "quiet = %b\n" !quiet;
|
||||||
|
printf "columns = %b\n" !columns;
|
||||||
|
printf "bytes = %b\n" !bytes;
|
||||||
|
printf "verbose = \"%s\"\n" verbose_str;
|
||||||
|
printf "input = %s\n" (string_of quote !input)
|
||||||
|
;;
|
||||||
|
|
||||||
|
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
|
||||||
|
|
||||||
|
let input =
|
||||||
|
match !input with
|
||||||
|
None | Some "-" -> !input
|
||||||
|
| Some file_path ->
|
||||||
|
if Filename.check_suffix file_path ".li"
|
||||||
|
then if Sys.file_exists file_path
|
||||||
|
then Some file_path
|
||||||
|
else abort "Source file not found."
|
||||||
|
else abort "Source file lacks the extension .ti."
|
||||||
|
|
||||||
|
(* Exporting remaining options as non-mutable values *)
|
||||||
|
|
||||||
|
let copy = !copy
|
||||||
|
and tokens = !tokens
|
||||||
|
and units = !units
|
||||||
|
and quiet = !quiet
|
||||||
|
and offsets = not !columns
|
||||||
|
and mode = if !bytes then `Byte else `Point
|
||||||
|
and verbose = !verbose
|
||||||
|
;;
|
||||||
|
|
||||||
|
if Utils.String.Set.mem "cmdline" verbose then
|
||||||
|
begin
|
||||||
|
printf "\nEXPORTED COMMAND LINE\n";
|
||||||
|
printf "copy = %b\n" copy;
|
||||||
|
printf "tokens = %b\n" tokens;
|
||||||
|
printf "units = %b\n" units;
|
||||||
|
printf "quiet = %b\n" quiet;
|
||||||
|
printf "offsets = %b\n" offsets;
|
||||||
|
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
|
||||||
|
printf "verbose = \"%s\"\n" verbose_str;
|
||||||
|
printf "input = %s\n" (string_of quote input)
|
||||||
|
end
|
||||||
|
;;
|
42
EvalOpt.mli
Normal file
42
EvalOpt.mli
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
(* Parsing the command-line option for testing the Ligo lexer and
|
||||||
|
parser *)
|
||||||
|
|
||||||
|
(* If the value [offsets] is [true], then the user requested that
|
||||||
|
messages about source positions and regions be expressed in terms
|
||||||
|
of horizontal offsets. *)
|
||||||
|
|
||||||
|
val offsets : bool
|
||||||
|
|
||||||
|
(* If the value [mode] is [`Byte], then the unit in which source
|
||||||
|
positions and regions are expressed in messages is the byte. If
|
||||||
|
[`Point], the unit is unicode points. *)
|
||||||
|
|
||||||
|
val mode : [`Byte | `Point]
|
||||||
|
|
||||||
|
(* If the option [verbose] is set to a list of predefined stages of
|
||||||
|
the compiler chain, then more information may be displayed about
|
||||||
|
those stages. *)
|
||||||
|
|
||||||
|
val verbose : Utils.String.Set.t
|
||||||
|
|
||||||
|
(* If the value [input] is [None] or [Some "-"], the input is standard
|
||||||
|
input. If [Some f], then the input is the file whose name (file
|
||||||
|
path) is [f]. *)
|
||||||
|
|
||||||
|
val input : string option
|
||||||
|
|
||||||
|
(* If the value [cmd] is
|
||||||
|
* [Quiet], then no output from the lexer and parser should be
|
||||||
|
expected, safe error messages: this is the default value;
|
||||||
|
* [Copy], then lexemes of tokens and markup will be printed to
|
||||||
|
standard output, with the expectation of a perfect match with
|
||||||
|
the input file;
|
||||||
|
* [Units], then the tokens and markup will be printed to standard
|
||||||
|
output, that is, the abstract representation of the concrete
|
||||||
|
lexical syntax;
|
||||||
|
* [Tokens], then the tokens only will be printed.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type command = Quiet | Copy | Units | Tokens
|
||||||
|
|
||||||
|
val cmd : command
|
19
FQueue.ml
Normal file
19
FQueue.ml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
(* Purely functional queues based on a pair of lists *)
|
||||||
|
|
||||||
|
type 'a t = {rear: 'a list; front: 'a list}
|
||||||
|
|
||||||
|
let empty = {rear=[]; front=[]}
|
||||||
|
|
||||||
|
let enq x q = {q with rear = x::q.rear}
|
||||||
|
|
||||||
|
let rec deq = function
|
||||||
|
{rear=[]; front= []} -> None
|
||||||
|
| {rear; front= []} -> deq {rear=[]; front = List.rev rear}
|
||||||
|
| {rear; front=x::f} -> Some ({rear; front=f}, x)
|
||||||
|
|
||||||
|
let rec peek = function
|
||||||
|
{rear=[]; front= []} -> None
|
||||||
|
| {rear; front= []} -> peek {rear=[]; front = List.rev rear}
|
||||||
|
| {rear=_; front=x::_} as q -> Some (q,x)
|
||||||
|
|
||||||
|
let is_empty q = (q = empty)
|
17
FQueue.mli
Normal file
17
FQueue.mli
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
(* Purely functional queues *)
|
||||||
|
|
||||||
|
type 'a t
|
||||||
|
|
||||||
|
val empty : 'a t
|
||||||
|
val enq : 'a -> 'a t -> 'a t
|
||||||
|
val deq : 'a t -> ('a t * 'a) option
|
||||||
|
|
||||||
|
val is_empty : 'a t -> bool
|
||||||
|
|
||||||
|
(* The call [peek q] is [None] if the queue [q] is empty, and,
|
||||||
|
otherwise, is a pair made of a queue and the next item in it to be
|
||||||
|
dequeued. The returned queue contains the same items as [q], in the
|
||||||
|
same order, but more efficient, in general, to use in further
|
||||||
|
calls. *)
|
||||||
|
|
||||||
|
val peek : 'a t -> ('a t * 'a) option
|
149
LexToken.mli
Normal file
149
LexToken.mli
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
(* This signature defines the lexical tokens for Ligo
|
||||||
|
|
||||||
|
_Tokens_ are the abstract units which are used by the parser to
|
||||||
|
build the abstract syntax tree (AST), in other words, the stream of
|
||||||
|
tokens is the minimal model of the input program, carrying
|
||||||
|
implicitly all its structure in a linear encoding, and nothing
|
||||||
|
else, in particular, comments and whitespace are absent.
|
||||||
|
|
||||||
|
A _lexeme_ is a specific character string (concrete
|
||||||
|
representation) denoting a token (abstract representation). Tokens
|
||||||
|
can be thought of as sets, and lexemes as elements of those sets --
|
||||||
|
there is often an infinite number of lexemes, but a small number of
|
||||||
|
tokens. (Think of identifiers as lexemes and one token.)
|
||||||
|
|
||||||
|
The tokens are qualified here as being "lexical" because the
|
||||||
|
parser generator Menhir expects to define them, in which context
|
||||||
|
they are called "parsing tokens", and they are made to match each
|
||||||
|
other. (This is an idiosyncratic terminology.)
|
||||||
|
|
||||||
|
The type of the lexical tokens is the variant [t], also
|
||||||
|
aliased to [token].
|
||||||
|
*)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* TOKENS *)
|
||||||
|
|
||||||
|
type t =
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
String of lexeme Region.reg
|
||||||
|
| Bytes of (lexeme * MBytes.t) Region.reg
|
||||||
|
| Int of (lexeme * Z.t) Region.reg
|
||||||
|
| Ident of lexeme Region.reg
|
||||||
|
| Constr of lexeme Region.reg
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| SEMI of Region.t (* ";" *)
|
||||||
|
| COMMA of Region.t (* "," *)
|
||||||
|
| LPAR of Region.t (* "(" *)
|
||||||
|
| RPAR of Region.t (* ")" *)
|
||||||
|
| LBRACE of Region.t (* "{" *)
|
||||||
|
| RBRACE of Region.t (* "}" *)
|
||||||
|
| LBRACKET of Region.t (* "[" *)
|
||||||
|
| RBRACKET of Region.t (* "]" *)
|
||||||
|
| CONS of Region.t (* "<:" *)
|
||||||
|
| VBAR of Region.t (* "|" *)
|
||||||
|
| ARROW of Region.t (* "->" *)
|
||||||
|
| ASGNMNT of Region.t (* ":=" *)
|
||||||
|
| EQUAL of Region.t (* "=" *)
|
||||||
|
| COLON of Region.t (* ":" *)
|
||||||
|
| OR of Region.t (* "||" *)
|
||||||
|
| AND of Region.t (* "&&" *)
|
||||||
|
| LT of Region.t (* "<" *)
|
||||||
|
| LEQ of Region.t (* "<=" *)
|
||||||
|
| GT of Region.t (* ">" *)
|
||||||
|
| GEQ of Region.t (* ">=" *)
|
||||||
|
| NEQ of Region.t (* "=/=" *)
|
||||||
|
| PLUS of Region.t (* "+" *)
|
||||||
|
| MINUS of Region.t (* "-" *)
|
||||||
|
| SLASH of Region.t (* "/" *)
|
||||||
|
| TIMES of Region.t (* "*" *)
|
||||||
|
| DOT of Region.t (* "." *)
|
||||||
|
| WILD of Region.t (* "_" *)
|
||||||
|
| CAT of Region.t (* "^" *)
|
||||||
|
|
||||||
|
(* Keywords *)
|
||||||
|
|
||||||
|
| Begin of Region.t
|
||||||
|
| Const of Region.t
|
||||||
|
| Down of Region.t
|
||||||
|
| If of Region.t
|
||||||
|
| In of Region.t
|
||||||
|
| Is of Region.t
|
||||||
|
| For of Region.t
|
||||||
|
| Function of Region.t
|
||||||
|
| Parameter of Region.t
|
||||||
|
| Storage of Region.t
|
||||||
|
| Type of Region.t
|
||||||
|
| Of of Region.t
|
||||||
|
| Operations of Region.t
|
||||||
|
| Var of Region.t
|
||||||
|
| End of Region.t
|
||||||
|
| Then of Region.t
|
||||||
|
| Else of Region.t
|
||||||
|
| Match of Region.t
|
||||||
|
| Null of Region.t
|
||||||
|
| Procedure of Region.t
|
||||||
|
| Record of Region.t
|
||||||
|
| Step of Region.t
|
||||||
|
| To of Region.t
|
||||||
|
| Mod of Region.t
|
||||||
|
| Not of Region.t
|
||||||
|
| While of Region.t
|
||||||
|
| With of Region.t
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
| C_False of Region.t (* "False" *)
|
||||||
|
| C_None of Region.t (* "None" *)
|
||||||
|
| C_Some of Region.t (* "Some" *)
|
||||||
|
| C_True of Region.t (* "True" *)
|
||||||
|
| C_Unit of Region.t (* "Unit" *)
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
| EOF of Region.t
|
||||||
|
|
||||||
|
|
||||||
|
type token = t
|
||||||
|
|
||||||
|
(* Projections
|
||||||
|
|
||||||
|
The difference between extracting the lexeme and a string from a
|
||||||
|
token is that the latter is the textual representation of the OCaml
|
||||||
|
value denoting the token (its abstract syntax), rather than its
|
||||||
|
lexeme (concrete syntax).
|
||||||
|
*)
|
||||||
|
|
||||||
|
val to_lexeme : token -> lexeme
|
||||||
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
val to_region : token -> Region.t
|
||||||
|
|
||||||
|
(* Injections *)
|
||||||
|
|
||||||
|
type int_err =
|
||||||
|
Non_canonical_zero
|
||||||
|
|
||||||
|
type ident_err = Reserved_name
|
||||||
|
|
||||||
|
val mk_string : lexeme -> Region.t -> token
|
||||||
|
val mk_bytes : lexeme -> Region.t -> token
|
||||||
|
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||||
|
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||||
|
val mk_constr : lexeme -> Region.t -> token
|
||||||
|
val mk_sym : lexeme -> Region.t -> token
|
||||||
|
val eof : Region.t -> token
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
val is_string : token -> bool
|
||||||
|
val is_bytes : token -> bool
|
||||||
|
val is_int : token -> bool
|
||||||
|
val is_ident : token -> bool
|
||||||
|
val is_kwd : token -> bool
|
||||||
|
val is_constr : token -> bool
|
||||||
|
val is_sym : token -> bool
|
||||||
|
val is_eof : token -> bool
|
616
LexToken.mll
Normal file
616
LexToken.mll
Normal file
@ -0,0 +1,616 @@
|
|||||||
|
(* Lexer specification for Ligo, to be processed by [ocamllex] *)
|
||||||
|
|
||||||
|
{
|
||||||
|
(* START HEADER *)
|
||||||
|
|
||||||
|
(* Shorthands *)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
module SMap = Utils.String.Map
|
||||||
|
module SSet = Utils.String.Set
|
||||||
|
|
||||||
|
(* Hack to roll back one lexeme in the current semantic action *)
|
||||||
|
(*
|
||||||
|
let rollback buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let len = String.length (lexeme buffer) in
|
||||||
|
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||||
|
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* TOKENS *)
|
||||||
|
|
||||||
|
type t =
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
String of lexeme Region.reg
|
||||||
|
| Bytes of (lexeme * MBytes.t) Region.reg
|
||||||
|
| Int of (lexeme * Z.t) Region.reg
|
||||||
|
| Ident of lexeme Region.reg
|
||||||
|
| Constr of lexeme Region.reg
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| SEMI of Region.t
|
||||||
|
| COMMA of Region.t
|
||||||
|
| LPAR of Region.t
|
||||||
|
| RPAR of Region.t
|
||||||
|
| LBRACE of Region.t
|
||||||
|
| RBRACE of Region.t
|
||||||
|
| LBRACKET of Region.t
|
||||||
|
| RBRACKET of Region.t
|
||||||
|
| CONS of Region.t
|
||||||
|
| VBAR of Region.t
|
||||||
|
| ARROW of Region.t
|
||||||
|
| ASGNMNT of Region.t
|
||||||
|
| EQUAL of Region.t
|
||||||
|
| COLON of Region.t
|
||||||
|
| OR of Region.t
|
||||||
|
| AND of Region.t
|
||||||
|
| LT of Region.t
|
||||||
|
| LEQ of Region.t
|
||||||
|
| GT of Region.t
|
||||||
|
| GEQ of Region.t
|
||||||
|
| NEQ of Region.t
|
||||||
|
| PLUS of Region.t
|
||||||
|
| MINUS of Region.t
|
||||||
|
| SLASH of Region.t
|
||||||
|
| TIMES of Region.t
|
||||||
|
| DOT of Region.t
|
||||||
|
| WILD of Region.t
|
||||||
|
| CAT of Region.t
|
||||||
|
|
||||||
|
(* Keywords *)
|
||||||
|
|
||||||
|
| Begin of Region.t
|
||||||
|
| Const of Region.t
|
||||||
|
| Down of Region.t
|
||||||
|
| If of Region.t
|
||||||
|
| In of Region.t
|
||||||
|
| Is of Region.t
|
||||||
|
| For of Region.t
|
||||||
|
| Function of Region.t
|
||||||
|
| Parameter of Region.t
|
||||||
|
| Storage of Region.t
|
||||||
|
| Type of Region.t
|
||||||
|
| Of of Region.t
|
||||||
|
| Operations of Region.t
|
||||||
|
| Var of Region.t
|
||||||
|
| End of Region.t
|
||||||
|
| Then of Region.t
|
||||||
|
| Else of Region.t
|
||||||
|
| Match of Region.t
|
||||||
|
| Null of Region.t
|
||||||
|
| Procedure of Region.t
|
||||||
|
| Record of Region.t
|
||||||
|
| Step of Region.t
|
||||||
|
| To of Region.t
|
||||||
|
| Mod of Region.t
|
||||||
|
| Not of Region.t
|
||||||
|
| While of Region.t
|
||||||
|
| With of Region.t
|
||||||
|
|
||||||
|
(* Types *)
|
||||||
|
(*
|
||||||
|
| T_address of Region.t (* "address" *)
|
||||||
|
| T_big_map of Region.t (* "big_map" *)
|
||||||
|
| T_bool of Region.t (* "bool" *)
|
||||||
|
| T_bytes of Region.t (* "bytes" *)
|
||||||
|
| T_contract of Region.t (* "contract" *)
|
||||||
|
| T_int of Region.t (* "int" *)
|
||||||
|
| T_key of Region.t (* "key" *)
|
||||||
|
| T_key_hash of Region.t (* "key_hash" *)
|
||||||
|
| T_list of Region.t (* "list" *)
|
||||||
|
| T_map of Region.t (* "map" *)
|
||||||
|
| T_mutez of Region.t (* "mutez" *)
|
||||||
|
| T_nat of Region.t (* "nat" *)
|
||||||
|
| T_operation of Region.t (* "operation" *)
|
||||||
|
| T_option of Region.t (* "option" *)
|
||||||
|
| T_set of Region.t (* "set" *)
|
||||||
|
| T_signature of Region.t (* "signature" *)
|
||||||
|
| T_string of Region.t (* "string" *)
|
||||||
|
| T_timestamp of Region.t (* "timestamp" *)
|
||||||
|
| T_unit of Region.t (* "unit" *)
|
||||||
|
*)
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
| C_False of Region.t (* "False" *)
|
||||||
|
| C_None of Region.t (* "None" *)
|
||||||
|
| C_Some of Region.t (* "Some" *)
|
||||||
|
| C_True of Region.t (* "True" *)
|
||||||
|
| C_Unit of Region.t (* "Unit" *)
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
| EOF of Region.t
|
||||||
|
|
||||||
|
|
||||||
|
type token = t
|
||||||
|
|
||||||
|
let proj_token = function
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
String Region.{region; value} ->
|
||||||
|
region, sprintf "String %s" value
|
||||||
|
|
||||||
|
| Bytes Region.{region; value = s,b} ->
|
||||||
|
region,
|
||||||
|
sprintf "Bytes (\"%s\", \"0x%s\")"
|
||||||
|
s (MBytes.to_hex b |> Hex.to_string)
|
||||||
|
|
||||||
|
| Int Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
|
| Ident Region.{region; value} ->
|
||||||
|
region, sprintf "Ident \"%s\"" value
|
||||||
|
|
||||||
|
| Constr Region.{region; value} ->
|
||||||
|
region, sprintf "Constr \"%s\"" value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| SEMI region -> region, "SEMI"
|
||||||
|
| COMMA region -> region, "COMMA"
|
||||||
|
| LPAR region -> region, "LPAR"
|
||||||
|
| RPAR region -> region, "RPAR"
|
||||||
|
| LBRACE region -> region, "LBRACE"
|
||||||
|
| RBRACE region -> region, "RBRACE"
|
||||||
|
| LBRACKET region -> region, "LBRACKET"
|
||||||
|
| RBRACKET region -> region, "RBRACKET"
|
||||||
|
| CONS region -> region, "CONS"
|
||||||
|
| VBAR region -> region, "VBAR"
|
||||||
|
| ARROW region -> region, "ARROW"
|
||||||
|
| ASGNMNT region -> region, "ASGNMNT"
|
||||||
|
| EQUAL region -> region, "EQUAL"
|
||||||
|
| COLON region -> region, "COLON"
|
||||||
|
| OR region -> region, "OR"
|
||||||
|
| AND region -> region, "AND"
|
||||||
|
| LT region -> region, "LT"
|
||||||
|
| LEQ region -> region, "LEQ"
|
||||||
|
| GT region -> region, "GT"
|
||||||
|
| GEQ region -> region, "GEQ"
|
||||||
|
| NEQ region -> region, "NEQ"
|
||||||
|
| PLUS region -> region, "PLUS"
|
||||||
|
| MINUS region -> region, "MINUS"
|
||||||
|
| SLASH region -> region, "SLASH"
|
||||||
|
| TIMES region -> region, "TIMES"
|
||||||
|
| DOT region -> region, "DOT"
|
||||||
|
| WILD region -> region, "WILD"
|
||||||
|
| CAT region -> region, "CAT"
|
||||||
|
|
||||||
|
(* Keywords *)
|
||||||
|
|
||||||
|
| Begin region -> region, "Begin"
|
||||||
|
| Const region -> region, "Const"
|
||||||
|
| Down region -> region, "Down"
|
||||||
|
| If region -> region, "If"
|
||||||
|
| In region -> region, "In"
|
||||||
|
| Is region -> region, "Is"
|
||||||
|
| For region -> region, "For"
|
||||||
|
| Function region -> region, "Function"
|
||||||
|
| Parameter region -> region, "Parameter"
|
||||||
|
| Storage region -> region, "Storage"
|
||||||
|
| Type region -> region, "Type"
|
||||||
|
| Of region -> region, "Of"
|
||||||
|
| Operations region -> region, "Operations"
|
||||||
|
| Var region -> region, "Var"
|
||||||
|
| End region -> region, "End"
|
||||||
|
| Then region -> region, "Then"
|
||||||
|
| Else region -> region, "Else"
|
||||||
|
| Match region -> region, "Match"
|
||||||
|
| Null region -> region, "Null"
|
||||||
|
| Procedure region -> region, "Procedure"
|
||||||
|
| Record region -> region, "Record"
|
||||||
|
| Step region -> region, "Step"
|
||||||
|
| To region -> region, "To"
|
||||||
|
| Mod region -> region, "Mod"
|
||||||
|
| Not region -> region, "Not"
|
||||||
|
| While region -> region, "While"
|
||||||
|
| With region -> region, "With"
|
||||||
|
|
||||||
|
(* Data *)
|
||||||
|
|
||||||
|
| C_False region -> region, "C_False"
|
||||||
|
| C_None region -> region, "C_None"
|
||||||
|
| C_Some region -> region, "C_Some"
|
||||||
|
| C_True region -> region, "C_True"
|
||||||
|
| C_Unit region -> region, "C_Unit"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
| EOF region -> region, "EOF"
|
||||||
|
|
||||||
|
|
||||||
|
let to_lexeme = function
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
String s -> s.Region.value
|
||||||
|
| Bytes b -> fst b.Region.value
|
||||||
|
| Int i -> fst i.Region.value
|
||||||
|
| Ident id
|
||||||
|
| Constr id -> id.Region.value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| SEMI _ -> ";"
|
||||||
|
| COMMA _ -> ","
|
||||||
|
| LPAR _ -> "("
|
||||||
|
| RPAR _ -> ")"
|
||||||
|
| LBRACE _ -> "{"
|
||||||
|
| RBRACE _ -> "}"
|
||||||
|
| LBRACKET _ -> "["
|
||||||
|
| RBRACKET _ -> "]"
|
||||||
|
| CONS _ -> "<:"
|
||||||
|
| VBAR _ -> "|"
|
||||||
|
| ARROW _ -> "->"
|
||||||
|
| ASGNMNT _ -> ":="
|
||||||
|
| EQUAL _ -> "="
|
||||||
|
| COLON _ -> ":"
|
||||||
|
| OR _ -> "||"
|
||||||
|
| AND _ -> "&&"
|
||||||
|
| LT _ -> "<"
|
||||||
|
| LEQ _ -> "<="
|
||||||
|
| GT _ -> ">"
|
||||||
|
| GEQ _ -> ">="
|
||||||
|
| NEQ _ -> "=/="
|
||||||
|
| PLUS _ -> "+"
|
||||||
|
| MINUS _ -> "-"
|
||||||
|
| SLASH _ -> "/"
|
||||||
|
| TIMES _ -> "*"
|
||||||
|
| DOT _ -> "."
|
||||||
|
| WILD _ -> "_"
|
||||||
|
| CAT _ -> "^"
|
||||||
|
|
||||||
|
(* Keywords *)
|
||||||
|
|
||||||
|
| Begin _ -> "begin"
|
||||||
|
| Const _ -> "const"
|
||||||
|
| Down _ -> "down"
|
||||||
|
| If _ -> "if"
|
||||||
|
| In _ -> "in"
|
||||||
|
| Is _ -> "is"
|
||||||
|
| For _ -> "for"
|
||||||
|
| Function _ -> "function"
|
||||||
|
| Parameter _ -> "parameter"
|
||||||
|
| Storage _ -> "storage"
|
||||||
|
| Type _ -> "type"
|
||||||
|
| Of _ -> "of"
|
||||||
|
| Operations _ -> "operations"
|
||||||
|
| Var _ -> "var"
|
||||||
|
| End _ -> "end"
|
||||||
|
| Then _ -> "then"
|
||||||
|
| Else _ -> "else"
|
||||||
|
| Match _ -> "match"
|
||||||
|
| Null _ -> "null"
|
||||||
|
| Procedure _ -> "procedure"
|
||||||
|
| Record _ -> "record"
|
||||||
|
| Step _ -> "step"
|
||||||
|
| To _ -> "to"
|
||||||
|
| Mod _ -> "mod"
|
||||||
|
| Not _ -> "not"
|
||||||
|
| While _ -> "while"
|
||||||
|
| With _ -> "with"
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
| C_False _ -> "False"
|
||||||
|
| C_None _ -> "None"
|
||||||
|
| C_Some _ -> "Some"
|
||||||
|
| C_True _ -> "True"
|
||||||
|
| C_Unit _ -> "Unit"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
| EOF _ -> ""
|
||||||
|
|
||||||
|
|
||||||
|
let to_string token ?(offsets=true) mode =
|
||||||
|
let region, val_str = proj_token token in
|
||||||
|
let reg_str = region#compact ~offsets mode
|
||||||
|
in sprintf "%s: %s" reg_str val_str
|
||||||
|
|
||||||
|
let to_region token = proj_token token |> fst
|
||||||
|
|
||||||
|
(* LEXIS *)
|
||||||
|
|
||||||
|
let keywords = [
|
||||||
|
(fun reg -> Begin reg);
|
||||||
|
(fun reg -> Const reg);
|
||||||
|
(fun reg -> Down reg);
|
||||||
|
(fun reg -> If reg);
|
||||||
|
(fun reg -> In reg);
|
||||||
|
(fun reg -> Is reg);
|
||||||
|
(fun reg -> For reg);
|
||||||
|
(fun reg -> Function reg);
|
||||||
|
(fun reg -> Parameter reg);
|
||||||
|
(fun reg -> Storage reg);
|
||||||
|
(fun reg -> Type reg);
|
||||||
|
(fun reg -> Of reg);
|
||||||
|
(fun reg -> Operations reg);
|
||||||
|
(fun reg -> Var reg);
|
||||||
|
(fun reg -> End reg);
|
||||||
|
(fun reg -> Then reg);
|
||||||
|
(fun reg -> Else reg);
|
||||||
|
(fun reg -> Match reg);
|
||||||
|
(fun reg -> Null reg);
|
||||||
|
(fun reg -> Procedure reg);
|
||||||
|
(fun reg -> Record reg);
|
||||||
|
(fun reg -> Step reg);
|
||||||
|
(fun reg -> To reg);
|
||||||
|
(fun reg -> Mod reg);
|
||||||
|
(fun reg -> Not reg);
|
||||||
|
(fun reg -> While reg);
|
||||||
|
(fun reg -> With reg)
|
||||||
|
]
|
||||||
|
|
||||||
|
let reserved =
|
||||||
|
let open SSet in
|
||||||
|
empty |> add "and"
|
||||||
|
|> add "as"
|
||||||
|
|> add "asr"
|
||||||
|
|> add "assert"
|
||||||
|
|> add "class"
|
||||||
|
|> add "constraint"
|
||||||
|
|> add "do"
|
||||||
|
|> add "done"
|
||||||
|
|> add "downto"
|
||||||
|
|> add "exception"
|
||||||
|
|> add "external"
|
||||||
|
|> add "false"
|
||||||
|
|> add "fun"
|
||||||
|
|> add "functor"
|
||||||
|
|> add "include"
|
||||||
|
|> add "inherit"
|
||||||
|
|> add "initializer"
|
||||||
|
|> add "land"
|
||||||
|
|> add "lazy"
|
||||||
|
|> add "let"
|
||||||
|
|> add "lor"
|
||||||
|
|> add "lsl"
|
||||||
|
|> add "lsr"
|
||||||
|
|> add "lxor"
|
||||||
|
|> add "method"
|
||||||
|
|> add "module"
|
||||||
|
|> add "mutable"
|
||||||
|
|> add "new"
|
||||||
|
|> add "nonrec"
|
||||||
|
|> add "object"
|
||||||
|
|> add "open"
|
||||||
|
|> add "or"
|
||||||
|
|> add "private"
|
||||||
|
|> add "rec"
|
||||||
|
|> add "sig"
|
||||||
|
|> add "struct"
|
||||||
|
|> add "true"
|
||||||
|
|> add "try"
|
||||||
|
|> add "val"
|
||||||
|
|> add "virtual"
|
||||||
|
|> add "when"
|
||||||
|
|
||||||
|
let constructors = [
|
||||||
|
(fun reg -> C_False reg);
|
||||||
|
(fun reg -> C_None reg);
|
||||||
|
(fun reg -> C_Some reg);
|
||||||
|
(fun reg -> C_True reg);
|
||||||
|
(fun reg -> C_Unit reg)
|
||||||
|
]
|
||||||
|
|
||||||
|
let add map (key, value) = SMap.add key value map
|
||||||
|
|
||||||
|
let mk_map mk_key list =
|
||||||
|
let apply map value = add map (mk_key value, value)
|
||||||
|
in List.fold_left apply SMap.empty list
|
||||||
|
|
||||||
|
type lexis = {
|
||||||
|
kwd : (Region.t -> token) SMap.t;
|
||||||
|
cstr : (Region.t -> token) SMap.t;
|
||||||
|
res : SSet.t
|
||||||
|
}
|
||||||
|
|
||||||
|
let lexicon : lexis =
|
||||||
|
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
|
||||||
|
in {kwd = build keywords;
|
||||||
|
cstr = build constructors;
|
||||||
|
res = reserved}
|
||||||
|
|
||||||
|
(* Identifiers *)
|
||||||
|
|
||||||
|
type ident_err = Reserved_name
|
||||||
|
|
||||||
|
(* END HEADER *)
|
||||||
|
}
|
||||||
|
|
||||||
|
(* START LEXER DEFINITION *)
|
||||||
|
|
||||||
|
(* Named regular expressions *)
|
||||||
|
|
||||||
|
let small = ['a'-'z']
|
||||||
|
let capital = ['A'-'Z']
|
||||||
|
let letter = small | capital
|
||||||
|
let digit = ['0'-'9']
|
||||||
|
let ident = small (letter | '_' | digit)*
|
||||||
|
let constr = capital (letter | '_' | digit)*
|
||||||
|
|
||||||
|
(* Rules *)
|
||||||
|
|
||||||
|
rule scan_ident region lexicon = parse
|
||||||
|
(ident as value) eof {
|
||||||
|
if SSet.mem value lexicon.res
|
||||||
|
then Error Reserved_name
|
||||||
|
else Ok (match SMap.find_opt value lexicon.kwd with
|
||||||
|
Some mk_kwd -> mk_kwd region
|
||||||
|
| None -> Ident Region.{region; value}) }
|
||||||
|
|
||||||
|
and scan_constr region lexicon = parse
|
||||||
|
(constr as value) eof {
|
||||||
|
match SMap.find_opt value lexicon.cstr with
|
||||||
|
Some mk_cstr -> mk_cstr region
|
||||||
|
| None -> Constr Region.{region; value} }
|
||||||
|
|
||||||
|
(* END LEXER DEFINITION *)
|
||||||
|
|
||||||
|
{
|
||||||
|
(* START TRAILER *)
|
||||||
|
|
||||||
|
(* Smart constructors (injections) *)
|
||||||
|
|
||||||
|
let mk_string lexeme region = String Region.{region; value=lexeme}
|
||||||
|
|
||||||
|
let mk_bytes lexeme region =
|
||||||
|
let norm = Str.(global_replace (regexp "_") "" lexeme) in
|
||||||
|
let value = lexeme, MBytes.of_hex (Hex.of_string norm)
|
||||||
|
in Bytes Region.{region; value}
|
||||||
|
|
||||||
|
type int_err = Non_canonical_zero
|
||||||
|
|
||||||
|
let mk_int lexeme region =
|
||||||
|
let z = Str.(global_replace (regexp "_") "" lexeme)
|
||||||
|
|> Z.of_string in
|
||||||
|
if Z.equal z Z.zero && lexeme <> "0"
|
||||||
|
then Error Non_canonical_zero
|
||||||
|
else Ok (Int Region.{region; value = lexeme, z})
|
||||||
|
|
||||||
|
let eof region = EOF region
|
||||||
|
|
||||||
|
let mk_sym lexeme region =
|
||||||
|
match lexeme with
|
||||||
|
";" -> SEMI region
|
||||||
|
| "," -> COMMA region
|
||||||
|
| "(" -> LPAR region
|
||||||
|
| ")" -> RPAR region
|
||||||
|
| "{" -> LBRACE region
|
||||||
|
| "}" -> RBRACE region
|
||||||
|
| "[" -> LBRACKET region
|
||||||
|
| "]" -> RBRACKET region
|
||||||
|
| "<:" -> CONS region
|
||||||
|
| "|" -> VBAR region
|
||||||
|
| "->" -> ARROW region
|
||||||
|
| ":=" -> ASGNMNT region
|
||||||
|
| "=" -> EQUAL region
|
||||||
|
| ":" -> COLON region
|
||||||
|
| "||" -> OR region
|
||||||
|
| "&&" -> AND region
|
||||||
|
| "<" -> LT region
|
||||||
|
| "<=" -> LEQ region
|
||||||
|
| ">" -> GT region
|
||||||
|
| ">=" -> GEQ region
|
||||||
|
| "=/=" -> NEQ region
|
||||||
|
| "+" -> PLUS region
|
||||||
|
| "-" -> MINUS region
|
||||||
|
| "/" -> SLASH region
|
||||||
|
| "*" -> TIMES region
|
||||||
|
| "." -> DOT region
|
||||||
|
| "_" -> WILD region
|
||||||
|
| "^" -> CAT region
|
||||||
|
| _ -> assert false
|
||||||
|
|
||||||
|
(* Identifiers *)
|
||||||
|
|
||||||
|
let mk_ident' lexeme region lexicon =
|
||||||
|
Lexing.from_string lexeme |> scan_ident region lexicon
|
||||||
|
|
||||||
|
let mk_ident lexeme region = mk_ident' lexeme region lexicon
|
||||||
|
|
||||||
|
(* Constructors *)
|
||||||
|
|
||||||
|
let mk_constr' lexeme region lexicon =
|
||||||
|
Lexing.from_string lexeme |> scan_constr region lexicon
|
||||||
|
|
||||||
|
let mk_constr lexeme region = mk_constr' lexeme region lexicon
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
let is_string = function
|
||||||
|
String _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_bytes = function
|
||||||
|
Bytes _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_int = function
|
||||||
|
Int _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_ident = function
|
||||||
|
Ident _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_kwd = function
|
||||||
|
| Begin _
|
||||||
|
| Const _
|
||||||
|
| Down _
|
||||||
|
| If _
|
||||||
|
| In _
|
||||||
|
| Is _
|
||||||
|
| For _
|
||||||
|
| Function _
|
||||||
|
| Parameter _
|
||||||
|
| Storage _
|
||||||
|
| Type _
|
||||||
|
| Of _
|
||||||
|
| Operations _
|
||||||
|
| Var _
|
||||||
|
| End _
|
||||||
|
| Then _
|
||||||
|
| Else _
|
||||||
|
| Match _
|
||||||
|
| Null _
|
||||||
|
| Procedure _
|
||||||
|
| Record _
|
||||||
|
| Step _
|
||||||
|
| To _
|
||||||
|
| Mod _
|
||||||
|
| Not _
|
||||||
|
| While _
|
||||||
|
| With _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_constr = function
|
||||||
|
Constr _
|
||||||
|
| C_False _
|
||||||
|
| C_None _
|
||||||
|
| C_Some _
|
||||||
|
| C_True _
|
||||||
|
| C_Unit _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_sym = function
|
||||||
|
SEMI _
|
||||||
|
| COMMA _
|
||||||
|
| LPAR _
|
||||||
|
| RPAR _
|
||||||
|
| LBRACE _
|
||||||
|
| RBRACE _
|
||||||
|
| LBRACKET _
|
||||||
|
| RBRACKET _
|
||||||
|
| CONS _
|
||||||
|
| VBAR _
|
||||||
|
| ARROW _
|
||||||
|
| ASGNMNT _
|
||||||
|
| EQUAL _
|
||||||
|
| COLON _
|
||||||
|
| OR _
|
||||||
|
| AND _
|
||||||
|
| LT _
|
||||||
|
| LEQ _
|
||||||
|
| GT _
|
||||||
|
| GEQ _
|
||||||
|
| NEQ _
|
||||||
|
| PLUS _
|
||||||
|
| MINUS _
|
||||||
|
| SLASH _
|
||||||
|
| TIMES _
|
||||||
|
| DOT _
|
||||||
|
| WILD _
|
||||||
|
| CAT _ -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
|
||||||
|
(* END TRAILER *)
|
||||||
|
}
|
153
Lexer.mli
Normal file
153
Lexer.mli
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
(* Lexer specification for Ligo, to be processed by [ocamllex].
|
||||||
|
|
||||||
|
The underlying design principles are:
|
||||||
|
|
||||||
|
(1) enforce stylistic constraints at a lexical level, in order to
|
||||||
|
early reject potentially misleading or poorly written
|
||||||
|
Ligo contracts;
|
||||||
|
|
||||||
|
(2) provide precise error messages with hint as how to fix the
|
||||||
|
issue, which is achieved by consulting the lexical
|
||||||
|
right-context of lexemes;
|
||||||
|
|
||||||
|
(3) be as independent as possible from the Ligo version, so
|
||||||
|
upgrades have as little impact as possible on this
|
||||||
|
specification: this is achieved by using the most general
|
||||||
|
regular expressions to match the lexing buffer and broadly
|
||||||
|
distinguish the syntactic categories, and then delegating a
|
||||||
|
finer, protocol-dependent, second analysis to an external
|
||||||
|
module making the tokens (hence a functor below);
|
||||||
|
|
||||||
|
(4) support unit testing (lexing of the whole input with debug
|
||||||
|
traces);
|
||||||
|
|
||||||
|
The limitation to the protocol independence lies in the errors that
|
||||||
|
the external module building the tokens (which is
|
||||||
|
protocol-dependent) may have to report. Indeed these errors have to
|
||||||
|
be contextualised by the lexer in terms of input source regions, so
|
||||||
|
useful error messages can be printed, therefore they are part of
|
||||||
|
the signature [TOKEN] that parameterise the functor generated
|
||||||
|
here. For instance, if, in a future release of Ligo, new tokens may
|
||||||
|
be added, and the recognition of their lexemes may entail new
|
||||||
|
errors, the signature [TOKEN] will have to be augmented and the
|
||||||
|
lexer specification changed. However, it is more likely that
|
||||||
|
instructions or types are added, instead of new kinds of tokens.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* TOKENS *)
|
||||||
|
|
||||||
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||||
|
can be a functor over tokens. This enables to externalise
|
||||||
|
version-dependent constraints in any module whose signature matches
|
||||||
|
[TOKEN]. Generic functions to construct tokens are required.
|
||||||
|
|
||||||
|
Note the predicate [is_eof], which caracterises the virtual token
|
||||||
|
for end-of-file, because it requires special handling. Some of
|
||||||
|
those functions may yield errors, which are defined as values of
|
||||||
|
the type [int_err] etc. These errors can be better understood by
|
||||||
|
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
||||||
|
*)
|
||||||
|
|
||||||
|
module type TOKEN =
|
||||||
|
sig
|
||||||
|
type token
|
||||||
|
|
||||||
|
(* Errors *)
|
||||||
|
|
||||||
|
type int_err = Non_canonical_zero
|
||||||
|
type ident_err = Reserved_name
|
||||||
|
|
||||||
|
(* Injections *)
|
||||||
|
|
||||||
|
val mk_string : lexeme -> Region.t -> token
|
||||||
|
val mk_bytes : lexeme -> Region.t -> token
|
||||||
|
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||||
|
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||||
|
val mk_constr : lexeme -> Region.t -> token
|
||||||
|
val mk_sym : lexeme -> Region.t -> token
|
||||||
|
val eof : Region.t -> token
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
val is_string : token -> bool
|
||||||
|
val is_bytes : token -> bool
|
||||||
|
val is_int : token -> bool
|
||||||
|
val is_ident : token -> bool
|
||||||
|
val is_kwd : token -> bool
|
||||||
|
val is_constr : token -> bool
|
||||||
|
val is_sym : token -> bool
|
||||||
|
val is_eof : token -> bool
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
|
val to_lexeme : token -> lexeme
|
||||||
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
val to_region : token -> Region.t
|
||||||
|
end
|
||||||
|
|
||||||
|
(* The module type for lexers is [S]. It mainly exports the function
|
||||||
|
[open_token_stream], which returns
|
||||||
|
|
||||||
|
* a function [read] that extracts tokens from a lexing buffer,
|
||||||
|
* together with a lexing buffer [buffer] to read from,
|
||||||
|
* a function [close] that closes that buffer,
|
||||||
|
* a function [get_pos] that returns the current position, and
|
||||||
|
* a function [get_last] that returns the region of the last
|
||||||
|
recognised token.
|
||||||
|
|
||||||
|
Note that a module [Token] is exported too, because the signature
|
||||||
|
of the exported functions depend on it.
|
||||||
|
|
||||||
|
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
|
||||||
|
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
|
||||||
|
parser generated by Menhir.
|
||||||
|
|
||||||
|
The argument labelled [log] is a logger. It may print a token and
|
||||||
|
its left markup to a given channel, at the caller's discretion.
|
||||||
|
*)
|
||||||
|
|
||||||
|
module type S =
|
||||||
|
sig
|
||||||
|
module Token : TOKEN
|
||||||
|
type token = Token.token
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type logger = Markup.t list -> token -> unit
|
||||||
|
|
||||||
|
val output_token :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
EvalOpt.command -> out_channel -> logger
|
||||||
|
|
||||||
|
type instance = {
|
||||||
|
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
val open_token_stream : file_path option -> instance
|
||||||
|
|
||||||
|
(* Error reporting *)
|
||||||
|
|
||||||
|
exception Error of Error.t Region.reg
|
||||||
|
|
||||||
|
val print_error : ?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
Error.t Region.reg -> unit
|
||||||
|
|
||||||
|
(* Standalone tracer *)
|
||||||
|
|
||||||
|
val trace :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
file_path option -> EvalOpt.command -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
(* The functorised interface
|
||||||
|
|
||||||
|
Note that the module parameter [Token] is re-exported as a
|
||||||
|
submodule in [S].
|
||||||
|
*)
|
||||||
|
|
||||||
|
module Make (Token: TOKEN) : S with module Token = Token
|
803
Lexer.mll
Normal file
803
Lexer.mll
Normal file
@ -0,0 +1,803 @@
|
|||||||
|
(* Lexer specification for Ligo, to be processed by [ocamllex]. *)
|
||||||
|
|
||||||
|
{
|
||||||
|
(* START HEADER *)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* STRING PROCESSING *)
|
||||||
|
|
||||||
|
(* The value of [mk_str len p] ("make string") is a string of length
|
||||||
|
[len] containing the [len] characters in the list [p], in reverse
|
||||||
|
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
|
||||||
|
|
||||||
|
let mk_str (len: int) (p: char list) : string =
|
||||||
|
let bytes = Bytes.make len ' ' in
|
||||||
|
let rec fill i = function
|
||||||
|
[] -> bytes
|
||||||
|
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
||||||
|
in fill (len-1) p |> Bytes.to_string
|
||||||
|
|
||||||
|
(* The call [explode s a] is the list made by pushing the characters
|
||||||
|
in the string [s] on top of [a], in reverse order. For example,
|
||||||
|
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||||
|
|
||||||
|
let explode s acc =
|
||||||
|
let rec push = function
|
||||||
|
0 -> acc
|
||||||
|
| i -> s.[i-1] :: push (i-1)
|
||||||
|
in push (String.length s)
|
||||||
|
|
||||||
|
(* LEXER ENGINE *)
|
||||||
|
|
||||||
|
(* Resetting file name and line number in the lexing buffer
|
||||||
|
|
||||||
|
The call [reset ~file ~line buffer] modifies in-place the lexing
|
||||||
|
buffer [buffer] so the lexing engine records that the file
|
||||||
|
associated with [buffer] is named [file], and the current line is
|
||||||
|
[line]. This function is useful when lexing a file that has been
|
||||||
|
previously preprocessed by the C preprocessor, in which case the
|
||||||
|
argument [file] is the name of the file that was preprocessed,
|
||||||
|
_not_ the preprocessed file (of which the user is not normally
|
||||||
|
aware). By default, the [line] argument is [1].
|
||||||
|
*)
|
||||||
|
|
||||||
|
let reset_file ~file buffer =
|
||||||
|
let open Lexing in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||||
|
|
||||||
|
let reset_line line_num buffer =
|
||||||
|
let open Lexing in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
|
||||||
|
|
||||||
|
let reset ~file ?(line=1) buffer =
|
||||||
|
(* Default value per the [Lexing] standard module convention *)
|
||||||
|
reset_file ~file buffer; reset_line line buffer
|
||||||
|
|
||||||
|
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||||
|
|
||||||
|
let rollback buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let len = String.length (lexeme buffer) in
|
||||||
|
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||||
|
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||||
|
|
||||||
|
(* ALIASES *)
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
(* TOKENS *)
|
||||||
|
|
||||||
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||||
|
can be a functor over tokens. Consequently, generic functions to
|
||||||
|
construct tokens are provided. Note predicate [is_eof], which
|
||||||
|
caracterises the virtual token for end-of-file, because it requires
|
||||||
|
special handling. *)
|
||||||
|
|
||||||
|
module type TOKEN =
|
||||||
|
sig
|
||||||
|
type token
|
||||||
|
|
||||||
|
(* Errors *)
|
||||||
|
|
||||||
|
type int_err = Non_canonical_zero
|
||||||
|
type ident_err = Reserved_name
|
||||||
|
|
||||||
|
(* Injections *)
|
||||||
|
|
||||||
|
val mk_string : lexeme -> Region.t -> token
|
||||||
|
val mk_bytes : lexeme -> Region.t -> token
|
||||||
|
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||||
|
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||||
|
val mk_constr : lexeme -> Region.t -> token
|
||||||
|
val mk_sym : lexeme -> Region.t -> token
|
||||||
|
val eof : Region.t -> token
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
val is_string : token -> bool
|
||||||
|
val is_bytes : token -> bool
|
||||||
|
val is_int : token -> bool
|
||||||
|
val is_ident : token -> bool
|
||||||
|
val is_kwd : token -> bool
|
||||||
|
val is_constr : token -> bool
|
||||||
|
val is_sym : token -> bool
|
||||||
|
val is_eof : token -> bool
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
|
val to_lexeme : token -> lexeme
|
||||||
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
val to_region : token -> Region.t
|
||||||
|
end
|
||||||
|
|
||||||
|
(* The module type for lexers is [S]. *)
|
||||||
|
|
||||||
|
module type S = sig
|
||||||
|
module Token : TOKEN
|
||||||
|
type token = Token.token
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type logger = Markup.t list -> token -> unit
|
||||||
|
|
||||||
|
val output_token :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
EvalOpt.command -> out_channel -> logger
|
||||||
|
|
||||||
|
type instance = {
|
||||||
|
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
val open_token_stream : file_path option -> instance
|
||||||
|
|
||||||
|
(* Error reporting *)
|
||||||
|
|
||||||
|
exception Error of Error.t Region.reg
|
||||||
|
|
||||||
|
val print_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
|
||||||
|
|
||||||
|
(* Standalone tracer *)
|
||||||
|
|
||||||
|
val trace :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
file_path option -> EvalOpt.command -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
(* The functorised interface
|
||||||
|
|
||||||
|
Note that the module parameter [Token] is re-exported as a
|
||||||
|
submodule in [S].
|
||||||
|
*)
|
||||||
|
|
||||||
|
module Make (Token: TOKEN) : (S with module Token = Token) =
|
||||||
|
struct
|
||||||
|
module Token = Token
|
||||||
|
type token = Token.token
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
|
||||||
|
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||||
|
|
||||||
|
(* When scanning structured constructs, like strings and comments,
|
||||||
|
we need to keep the region of the opening symbol (like double
|
||||||
|
quote, "#" or "/*") in order to report any error more
|
||||||
|
precisely. Since ocamllex is byte-oriented, we need to store
|
||||||
|
the parsed bytes are characters in an accumulator [acc] and
|
||||||
|
also its length [len], so, we are done, it is easy to build the
|
||||||
|
string making up the structured construct with [mk_str] (see
|
||||||
|
above).
|
||||||
|
|
||||||
|
The resulting data structure is called a _thread_.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type thread = {
|
||||||
|
opening : Region.t;
|
||||||
|
len : int;
|
||||||
|
acc : char list
|
||||||
|
}
|
||||||
|
|
||||||
|
let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
|
||||||
|
|
||||||
|
let push_string str {opening; len; acc} =
|
||||||
|
{opening;
|
||||||
|
len = len + String.length str;
|
||||||
|
acc = explode str acc}
|
||||||
|
|
||||||
|
(* STATE *)
|
||||||
|
|
||||||
|
(* Beyond tokens, the result of lexing is a state (a so-called
|
||||||
|
_state monad_). The type [state] represents the logical state
|
||||||
|
of the lexing engine, that is, a value which is threaded during
|
||||||
|
scanning and which denotes useful, high-level information
|
||||||
|
beyond what the type [Lexing.lexbuf] in the standard library
|
||||||
|
already provides for all generic lexers.
|
||||||
|
|
||||||
|
Tokens are the smallest units used by the parser to build the
|
||||||
|
abstract syntax tree. The state includes a queue of recognised
|
||||||
|
tokens, with the markup at the left of its lexeme until either
|
||||||
|
the start of the file or the end of the previously recognised
|
||||||
|
token.
|
||||||
|
|
||||||
|
The markup from the last recognised token or, if the first
|
||||||
|
token has not been recognised yet, from the beginning of the
|
||||||
|
file is stored in the field [markup] of the state because it is
|
||||||
|
a side-effect, with respect to the output token list, and we
|
||||||
|
use a record with a single field [units] because that record
|
||||||
|
may be easily extended during the future maintenance of this
|
||||||
|
lexer.
|
||||||
|
|
||||||
|
The state also includes a field [pos] which holds the current
|
||||||
|
position in the Ligo source file. The position is not always
|
||||||
|
updated after a single character has been matched: that depends
|
||||||
|
on the regular expression that matched the lexing buffer.
|
||||||
|
|
||||||
|
The fields [decoder] and [supply] offer the support needed
|
||||||
|
for the lexing of UTF-8 encoded characters in comments (the
|
||||||
|
only place where they are allowed in Ligo). The former is the
|
||||||
|
decoder proper and the latter is the effectful function
|
||||||
|
[supply] that takes a byte, a start index and a length and feed
|
||||||
|
it to [decoder]. See the documentation of the third-party
|
||||||
|
library Uutf.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type state = {
|
||||||
|
units : (Markup.t list * token) FQueue.t;
|
||||||
|
markup : Markup.t list;
|
||||||
|
last : Region.t;
|
||||||
|
pos : Pos.t;
|
||||||
|
decoder : Uutf.decoder;
|
||||||
|
supply : Bytes.t -> int -> int -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
(* The call [enqueue (token, state)] updates functionally the
|
||||||
|
state [state] by associating the token [token] with the stored
|
||||||
|
markup and enqueuing the pair into the units queue. The field
|
||||||
|
[markup] is then reset to the empty list. *)
|
||||||
|
|
||||||
|
let enqueue (token, state) = {
|
||||||
|
state with
|
||||||
|
units = FQueue.enq (state.markup, token) state.units;
|
||||||
|
markup = []
|
||||||
|
}
|
||||||
|
|
||||||
|
(* The call [sync state buffer] updates the current position in
|
||||||
|
accordance with the contents of the lexing buffer, more
|
||||||
|
precisely, depending on the length of the string which has just
|
||||||
|
been recognised by the scanner: that length is used as a
|
||||||
|
positive offset to the current column. *)
|
||||||
|
|
||||||
|
let sync state buffer =
|
||||||
|
let lex = Lexing.lexeme buffer in
|
||||||
|
let len = String.length lex in
|
||||||
|
let start = state.pos in
|
||||||
|
let stop = start#shift_bytes len in
|
||||||
|
let state = {state with pos = stop}
|
||||||
|
in Region.make ~start ~stop, lex, state
|
||||||
|
|
||||||
|
(* MARKUP *)
|
||||||
|
|
||||||
|
(* Committing markup to the current logical state *)
|
||||||
|
|
||||||
|
let push_newline state buffer =
|
||||||
|
let value = Lexing.lexeme buffer
|
||||||
|
and () = Lexing.new_line buffer
|
||||||
|
and start = state.pos in
|
||||||
|
let stop = start#new_line value in
|
||||||
|
let state = {state with pos = stop}
|
||||||
|
and region = Region.make ~start ~stop in
|
||||||
|
let unit = Markup.Newline Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
let push_line (thread, state) =
|
||||||
|
let start = thread.opening#start in
|
||||||
|
let region = Region.make ~start ~stop:state.pos
|
||||||
|
and value = mk_str thread.len thread.acc in
|
||||||
|
let unit = Markup.LineCom Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
let push_block (thread, state) =
|
||||||
|
let start = thread.opening#start in
|
||||||
|
let region = Region.make ~start ~stop:state.pos
|
||||||
|
and value = mk_str thread.len thread.acc in
|
||||||
|
let unit = Markup.BlockCom Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
let push_space state buffer =
|
||||||
|
let region, lex, state = sync state buffer in
|
||||||
|
let value = String.length lex in
|
||||||
|
let unit = Markup.Space Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
let push_tabs state buffer =
|
||||||
|
let region, lex, state = sync state buffer in
|
||||||
|
let value = String.length lex in
|
||||||
|
let unit = Markup.Tabs Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
let push_bom state buffer =
|
||||||
|
let region, value, state = sync state buffer in
|
||||||
|
let unit = Markup.BOM Region.{region; value} in
|
||||||
|
let markup = unit :: state.markup
|
||||||
|
in {state with markup}
|
||||||
|
|
||||||
|
(* ERRORS *)
|
||||||
|
|
||||||
|
type Error.t += Invalid_utf8_sequence
|
||||||
|
type Error.t += Unexpected_character of char
|
||||||
|
type Error.t += Undefined_escape_sequence
|
||||||
|
type Error.t += Missing_break
|
||||||
|
type Error.t += Unterminated_string
|
||||||
|
type Error.t += Unterminated_integer
|
||||||
|
type Error.t += Odd_lengthed_bytes
|
||||||
|
type Error.t += Unterminated_comment
|
||||||
|
type Error.t += Orphan_minus
|
||||||
|
type Error.t += Non_canonical_zero
|
||||||
|
type Error.t += Negative_byte_sequence
|
||||||
|
type Error.t += Broken_string
|
||||||
|
type Error.t += Invalid_character_in_string
|
||||||
|
type Error.t += Reserved_name
|
||||||
|
|
||||||
|
let error_to_string = function
|
||||||
|
Invalid_utf8_sequence ->
|
||||||
|
"Invalid UTF-8 sequence.\n"
|
||||||
|
| Unexpected_character c ->
|
||||||
|
sprintf "Unexpected character '%c'.\n" c
|
||||||
|
| Undefined_escape_sequence ->
|
||||||
|
"Undefined escape sequence.\n\
|
||||||
|
Hint: Remove or replace the sequence.\n"
|
||||||
|
| Missing_break ->
|
||||||
|
"Missing break.\n\
|
||||||
|
Hint: Insert some space.\n"
|
||||||
|
| Unterminated_string ->
|
||||||
|
"Unterminated string.\n\
|
||||||
|
Hint: Close with double quotes.\n"
|
||||||
|
| Unterminated_integer ->
|
||||||
|
"Unterminated integer.\n\
|
||||||
|
Hint: Remove the sign or proceed with a natural number.\n"
|
||||||
|
| Odd_lengthed_bytes ->
|
||||||
|
"The length of the byte sequence is an odd number.\n\
|
||||||
|
Hint: Add or remove a digit.\n"
|
||||||
|
| Unterminated_comment ->
|
||||||
|
"Unterminated comment.\n\
|
||||||
|
Hint: Close with \"*/\".\n"
|
||||||
|
| Orphan_minus ->
|
||||||
|
"Orphan minus sign.\n\
|
||||||
|
Hint: Remove the trailing space.\n"
|
||||||
|
| Non_canonical_zero ->
|
||||||
|
"Non-canonical zero.\n\
|
||||||
|
Hint: Use 0.\n"
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign.\n"
|
||||||
|
| Broken_string ->
|
||||||
|
"The string starting here is interrupted by a line break.\n\
|
||||||
|
Hint: Remove the break or close the string before.\n"
|
||||||
|
| Invalid_character_in_string ->
|
||||||
|
"Invalid character in string.\n\
|
||||||
|
Hint: Remove or replace the character.\n"
|
||||||
|
| Reserved_name ->
|
||||||
|
"Reserved named.\n\
|
||||||
|
Hint: Change the name.\n"
|
||||||
|
| _ -> assert false
|
||||||
|
|
||||||
|
exception Error of Error.t Region.reg
|
||||||
|
|
||||||
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
|
(* TOKENS *)
|
||||||
|
|
||||||
|
(* Making tokens *)
|
||||||
|
|
||||||
|
let mk_string (thread, state) =
|
||||||
|
let start = thread.opening#start in
|
||||||
|
let stop = state.pos in
|
||||||
|
let region = Region.make ~start ~stop in
|
||||||
|
let lexeme = mk_str thread.len thread.acc in
|
||||||
|
let token = Token.mk_string lexeme region
|
||||||
|
in token, state
|
||||||
|
|
||||||
|
let mk_bytes bytes state buffer =
|
||||||
|
let region, _, state = sync state buffer in
|
||||||
|
let token = Token.mk_bytes bytes region
|
||||||
|
in token, state
|
||||||
|
|
||||||
|
let mk_int state buffer =
|
||||||
|
let region, lexeme, state = sync state buffer in
|
||||||
|
match Token.mk_int lexeme region with
|
||||||
|
Ok token -> token, state
|
||||||
|
| Error Token.Non_canonical_zero ->
|
||||||
|
fail region Non_canonical_zero
|
||||||
|
|
||||||
|
let mk_ident state buffer =
|
||||||
|
let region, lexeme, state = sync state buffer in
|
||||||
|
match Token.mk_ident lexeme region with
|
||||||
|
Ok token -> token, state
|
||||||
|
| Error Token.Reserved_name -> fail region Reserved_name
|
||||||
|
|
||||||
|
let mk_constr state buffer =
|
||||||
|
let region, lexeme, state = sync state buffer
|
||||||
|
in Token.mk_constr lexeme region, state
|
||||||
|
|
||||||
|
let mk_sym state buffer =
|
||||||
|
let region, lexeme, state = sync state buffer
|
||||||
|
in Token.mk_sym lexeme region, state
|
||||||
|
|
||||||
|
let mk_eof state buffer =
|
||||||
|
let region, _, state = sync state buffer
|
||||||
|
in Token.eof region, state
|
||||||
|
|
||||||
|
(* END HEADER *)
|
||||||
|
}
|
||||||
|
|
||||||
|
(* START LEXER DEFINITION *)
|
||||||
|
|
||||||
|
(* Named regular expressions *)
|
||||||
|
|
||||||
|
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
||||||
|
let nl = ['\n' '\r'] | "\r\n"
|
||||||
|
let digit = ['0'-'9']
|
||||||
|
let natural = digit | digit (digit | '_')* digit
|
||||||
|
let integer = '-'? natural
|
||||||
|
let small = ['a'-'z']
|
||||||
|
let capital = ['A'-'Z']
|
||||||
|
let letter = small | capital
|
||||||
|
let ident = small (letter | '_' | digit)*
|
||||||
|
let constr = capital (letter | '_' | digit)*
|
||||||
|
let hexa_digit = digit | ['A'-'F']
|
||||||
|
let byte = hexa_digit hexa_digit
|
||||||
|
let byte_seq = byte | byte (byte | '_')* byte
|
||||||
|
let bytes = "0x" (byte_seq? as seq)
|
||||||
|
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
|
||||||
|
| "\\r" | "\\t" | "\\x" byte
|
||||||
|
let symbol = ';' | ','
|
||||||
|
| '(' | ')' | '{' | '}' | '[' | ']'
|
||||||
|
| "<:" | '|' | "->" | ":=" | '=' | ':'
|
||||||
|
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
|
||||||
|
| '+' | '-' | '*' | '.' | '_' | '^'
|
||||||
|
|
||||||
|
(* RULES *)
|
||||||
|
|
||||||
|
(* Except for the first rule [init], all rules bear a name starting
|
||||||
|
with "scan".
|
||||||
|
|
||||||
|
All have a parameter [state] that they thread through their
|
||||||
|
recursive calls. The rules for the structured constructs (strings
|
||||||
|
and comments) have an extra parameter of type [thread] (see above).
|
||||||
|
*)
|
||||||
|
|
||||||
|
rule init state = parse
|
||||||
|
utf8_bom { scan (push_bom state lexbuf) lexbuf }
|
||||||
|
| _ { rollback lexbuf; scan state lexbuf }
|
||||||
|
|
||||||
|
and scan state = parse
|
||||||
|
nl { scan (push_newline state lexbuf) lexbuf }
|
||||||
|
| ' '+ { scan (push_space state lexbuf) lexbuf }
|
||||||
|
| '\t'+ { scan (push_tabs state lexbuf) lexbuf }
|
||||||
|
|
||||||
|
| ident { mk_ident state lexbuf |> enqueue }
|
||||||
|
| constr { mk_constr state lexbuf |> enqueue }
|
||||||
|
| bytes { (mk_bytes seq) state lexbuf |> enqueue }
|
||||||
|
| integer { mk_int state lexbuf |> enqueue }
|
||||||
|
| symbol { mk_sym state lexbuf |> enqueue }
|
||||||
|
| eof { mk_eof state lexbuf |> enqueue }
|
||||||
|
|
||||||
|
| '"' { let opening, _, state = sync state lexbuf in
|
||||||
|
let thread = {opening; len=1; acc=['"']} in
|
||||||
|
scan_string thread state lexbuf |> mk_string |> enqueue }
|
||||||
|
|
||||||
|
| "/*" { let opening, _, state = sync state lexbuf in
|
||||||
|
let thread = {opening; len=2; acc=['*';'/']} in
|
||||||
|
let state = scan_block thread state lexbuf |> push_block
|
||||||
|
in scan state lexbuf }
|
||||||
|
|
||||||
|
| '#' { let opening, _, state = sync state lexbuf in
|
||||||
|
let thread = {opening; len=1; acc=['#']} in
|
||||||
|
let state = scan_line thread state lexbuf |> push_line
|
||||||
|
in scan state lexbuf }
|
||||||
|
|
||||||
|
(* Some special errors
|
||||||
|
|
||||||
|
Some special errors are recognised in the semantic actions of the
|
||||||
|
following regular expressions. The first error is a minus sign
|
||||||
|
separated from the integer it modifies by some markup (space or
|
||||||
|
tabs). The second is a minus sign immediately followed by
|
||||||
|
anything else than a natural number (matched above) or markup and
|
||||||
|
a number (previous error). The third is the strange occurrence of
|
||||||
|
an attempt at defining a negative byte sequence. Finally, the
|
||||||
|
catch-all rule reports unexpected characters in the buffer (and
|
||||||
|
is not so special, after all).
|
||||||
|
*)
|
||||||
|
|
||||||
|
| '-' { let region, _, state = sync state lexbuf in
|
||||||
|
let state = scan state lexbuf in
|
||||||
|
let open Markup in
|
||||||
|
match FQueue.peek state.units with
|
||||||
|
None -> assert false
|
||||||
|
| Some (_, ((Space _ | Tabs _)::_, token))
|
||||||
|
when Token.is_int token ->
|
||||||
|
fail region Orphan_minus
|
||||||
|
| _ -> fail region Unterminated_integer }
|
||||||
|
|
||||||
|
| '-' "0x" byte_seq?
|
||||||
|
{ let region, _, _ = sync state lexbuf
|
||||||
|
in fail region Negative_byte_sequence }
|
||||||
|
|
||||||
|
| _ as c { let region, _, _ = sync state lexbuf
|
||||||
|
in fail region (Unexpected_character c) }
|
||||||
|
|
||||||
|
(* Finishing a string *)
|
||||||
|
|
||||||
|
and scan_string thread state = parse
|
||||||
|
nl { fail thread.opening Broken_string }
|
||||||
|
| eof { fail thread.opening Unterminated_string }
|
||||||
|
| ['\t' '\r' '\b']
|
||||||
|
{ let region, _, _ = sync state lexbuf
|
||||||
|
in fail region Invalid_character_in_string }
|
||||||
|
| '"' { let _, _, state = sync state lexbuf
|
||||||
|
in push_char '"' thread, state }
|
||||||
|
| esc { let _, lexeme, state = sync state lexbuf
|
||||||
|
in scan_string (push_string lexeme thread) state lexbuf }
|
||||||
|
| '\\' _ { let region, _, _ = sync state lexbuf
|
||||||
|
in fail region Undefined_escape_sequence }
|
||||||
|
| _ as c { let _, _, state = sync state lexbuf in
|
||||||
|
scan_string (push_char c thread) state lexbuf }
|
||||||
|
|
||||||
|
(* Finishing a block comment
|
||||||
|
|
||||||
|
The lexing of block comments must take care of embedded block
|
||||||
|
comments that may occur within, as well as strings, so no substring
|
||||||
|
"*/" may inadvertantly close the block. This is the purpose of the
|
||||||
|
first case of the scanner [scan_block].
|
||||||
|
*)
|
||||||
|
|
||||||
|
and scan_block thread state = parse
|
||||||
|
'"' | "/*" { let opening = thread.opening in
|
||||||
|
let opening', lexeme, state = sync state lexbuf in
|
||||||
|
let thread = push_string lexeme thread in
|
||||||
|
let thread = {thread with opening=opening'} in
|
||||||
|
let next = if lexeme = "\"" then scan_string
|
||||||
|
else scan_block in
|
||||||
|
let thread, state = next thread state lexbuf in
|
||||||
|
let thread = {thread with opening}
|
||||||
|
in scan_block thread state lexbuf }
|
||||||
|
| "*/" { let _, lexeme, state = sync state lexbuf
|
||||||
|
in push_string lexeme thread, state }
|
||||||
|
| nl as nl { let () = Lexing.new_line lexbuf
|
||||||
|
and state = {state with pos = state.pos#new_line nl}
|
||||||
|
and thread = push_string nl thread
|
||||||
|
in scan_block thread state lexbuf }
|
||||||
|
| eof { fail thread.opening Unterminated_comment }
|
||||||
|
| _ { let () = rollback lexbuf in
|
||||||
|
let len = thread.len in
|
||||||
|
let thread,
|
||||||
|
status = scan_utf8 thread state lexbuf in
|
||||||
|
let delta = thread.len - len in
|
||||||
|
let pos = state.pos#shift_one_uchar delta in
|
||||||
|
match status with
|
||||||
|
None -> scan_block thread {state with pos} lexbuf
|
||||||
|
| Some error ->
|
||||||
|
let region = Region.make ~start:state.pos ~stop:pos
|
||||||
|
in fail region error }
|
||||||
|
|
||||||
|
(* Finishing a line comment *)
|
||||||
|
|
||||||
|
and scan_line thread state = parse
|
||||||
|
nl as nl { let () = Lexing.new_line lexbuf
|
||||||
|
and thread = push_string nl thread
|
||||||
|
and state = {state with pos = state.pos#new_line nl}
|
||||||
|
in thread, state }
|
||||||
|
| eof { fail thread.opening Unterminated_comment }
|
||||||
|
| _ { let () = rollback lexbuf in
|
||||||
|
let len = thread.len in
|
||||||
|
let thread,
|
||||||
|
status = scan_utf8 thread state lexbuf in
|
||||||
|
let delta = thread.len - len in
|
||||||
|
let pos = state.pos#shift_one_uchar delta in
|
||||||
|
match status with
|
||||||
|
None -> scan_line thread {state with pos} lexbuf
|
||||||
|
| Some error ->
|
||||||
|
let region = Region.make ~start:state.pos ~stop:pos
|
||||||
|
in fail region error }
|
||||||
|
|
||||||
|
and scan_utf8 thread state = parse
|
||||||
|
eof { fail thread.opening Unterminated_comment }
|
||||||
|
| _ as c { let thread = push_char c thread in
|
||||||
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
|
let () = state.supply (Bytes.of_string lexeme) 0 1 in
|
||||||
|
match Uutf.decode state.decoder with
|
||||||
|
`Uchar _ -> thread, None
|
||||||
|
| `Malformed _ -> thread, Some Invalid_utf8_sequence
|
||||||
|
| `Await -> scan_utf8 thread state lexbuf
|
||||||
|
| `End -> assert false }
|
||||||
|
|
||||||
|
(* END LEXER DEFINITION *)
|
||||||
|
|
||||||
|
{
|
||||||
|
(* START TRAILER *)
|
||||||
|
|
||||||
|
(* Scanning the lexing buffer for tokens (and markup, as a
|
||||||
|
side-effect).
|
||||||
|
|
||||||
|
Because we want the lexer to have access to the right lexical
|
||||||
|
context of a recognised lexeme (to enforce stylistic constraints or
|
||||||
|
report special error patterns), we need to keep a hidden reference
|
||||||
|
to a queue of recognised lexical units (that is, tokens and markup)
|
||||||
|
that acts as a mutable state between the calls to
|
||||||
|
[read_token]. When [read_token] is called, that queue is consulted
|
||||||
|
first and, if it contains at least one token, that token is
|
||||||
|
returned; otherwise, the lexing buffer is scanned for at least one
|
||||||
|
more new token. That is the general principle: we put a high-level
|
||||||
|
buffer (our queue) on top of the low-level lexing buffer.
|
||||||
|
|
||||||
|
One tricky and important detail is that we must make any parser
|
||||||
|
generated by Menhir (and calling [read_token]) believe that the
|
||||||
|
last region of the input source that was matched indeed corresponds
|
||||||
|
to the returned token, despite that many tokens and markup may have
|
||||||
|
been matched since it was actually read from the input. In other
|
||||||
|
words, the parser requests a token that is taken from the
|
||||||
|
high-level buffer, but the parser requests the source regions from
|
||||||
|
the _low-level_ lexing buffer, and they may disagree if more than
|
||||||
|
one token has actually been recognised.
|
||||||
|
|
||||||
|
Consequently, in order to maintain a consistent view for the
|
||||||
|
parser, we have to patch some fields of the lexing buffer, namely
|
||||||
|
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
||||||
|
generated by Menhir when querying source positions (regions). This
|
||||||
|
is the purpose of the function [patch_buffer]. After reading one
|
||||||
|
ore more tokens and markup by the scanning rule [scan], we have to
|
||||||
|
save in the hidden reference [buf_reg] the region of the source
|
||||||
|
that was matched by [scan]. This atomic sequence of patching,
|
||||||
|
scanning and saving is implemented by the _function_ [scan]
|
||||||
|
(beware: it shadows the scanning rule [scan]). The function
|
||||||
|
[patch_buffer] is, of course, also called just before returning the
|
||||||
|
token, so the parser has a view of the lexing buffer consistent
|
||||||
|
with the token.
|
||||||
|
|
||||||
|
Note that an additional reference [first_call] is needed to
|
||||||
|
distinguish the first call to the function [scan], as the first
|
||||||
|
scanning rule is actually [init] (which can handle the BOM), not
|
||||||
|
[scan].
|
||||||
|
*)
|
||||||
|
|
||||||
|
type logger = Markup.t list -> token -> unit
|
||||||
|
|
||||||
|
type instance = {
|
||||||
|
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_path = match EvalOpt.input with
|
||||||
|
None | Some "-" -> ""
|
||||||
|
| Some file_path -> file_path
|
||||||
|
let pos = Pos.min#set_file file_path
|
||||||
|
let buf_reg = ref (pos#byte, pos#byte)
|
||||||
|
and first_call = ref true
|
||||||
|
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
|
||||||
|
let supply = Uutf.Manual.src decoder
|
||||||
|
let state = ref {units = FQueue.empty;
|
||||||
|
last = Region.ghost;
|
||||||
|
pos;
|
||||||
|
markup = [];
|
||||||
|
decoder;
|
||||||
|
supply}
|
||||||
|
|
||||||
|
let get_pos () = !state.pos
|
||||||
|
|
||||||
|
let get_last () = !state.last
|
||||||
|
|
||||||
|
let patch_buffer (start, stop) buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let file_path = buffer.lex_curr_p.pos_fname in
|
||||||
|
buffer.lex_start_p <- {start with pos_fname = file_path};
|
||||||
|
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
||||||
|
|
||||||
|
and save_region buffer =
|
||||||
|
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
|
||||||
|
|
||||||
|
let scan buffer =
|
||||||
|
patch_buffer !buf_reg buffer;
|
||||||
|
(if !first_call
|
||||||
|
then (state := init !state buffer; first_call := false)
|
||||||
|
else state := scan !state buffer);
|
||||||
|
save_region buffer
|
||||||
|
|
||||||
|
let next_token buffer =
|
||||||
|
scan buffer;
|
||||||
|
match FQueue.peek !state.units with
|
||||||
|
None -> assert false
|
||||||
|
| Some (units, ext_token) ->
|
||||||
|
state := {!state with units}; Some ext_token
|
||||||
|
|
||||||
|
let check_right_context token buffer =
|
||||||
|
let open Token in
|
||||||
|
if is_int token || is_bytes token then
|
||||||
|
match next_token buffer with
|
||||||
|
Some ([], next) ->
|
||||||
|
let pos = (Token.to_region token)#stop in
|
||||||
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
|
if is_bytes token && is_int next then
|
||||||
|
fail region Odd_lengthed_bytes
|
||||||
|
else
|
||||||
|
if is_ident next || is_string next
|
||||||
|
|| is_bytes next || is_int next then
|
||||||
|
fail region Missing_break
|
||||||
|
| _ -> ()
|
||||||
|
else
|
||||||
|
if Token.is_ident token || Token.is_string token then
|
||||||
|
match next_token buffer with
|
||||||
|
Some ([], next) ->
|
||||||
|
if Token.is_ident next || Token.is_string next
|
||||||
|
|| Token.is_bytes next || Token.is_int next
|
||||||
|
then
|
||||||
|
let pos = (Token.to_region token)#stop in
|
||||||
|
let region = Region.make ~start:pos ~stop:pos
|
||||||
|
in fail region Missing_break
|
||||||
|
| _ -> ()
|
||||||
|
|
||||||
|
let rec read_token ?(log=fun _ _ -> ()) buffer =
|
||||||
|
match FQueue.deq !state.units with
|
||||||
|
None ->
|
||||||
|
scan buffer;
|
||||||
|
read_token ~log buffer
|
||||||
|
| Some (units, (left_mark, token)) ->
|
||||||
|
log left_mark token;
|
||||||
|
state := {!state with units; last = Token.to_region token};
|
||||||
|
check_right_context token buffer;
|
||||||
|
patch_buffer (Token.to_region token)#byte_pos buffer;
|
||||||
|
token
|
||||||
|
|
||||||
|
let open_token_stream file_path_opt =
|
||||||
|
let cin = match file_path_opt with
|
||||||
|
None | Some "-" -> stdin
|
||||||
|
| Some file_path -> open_in file_path in
|
||||||
|
let buffer = Lexing.from_channel cin in
|
||||||
|
let () = match file_path_opt with
|
||||||
|
None | Some "-" -> ()
|
||||||
|
| Some file_path -> reset ~file:file_path buffer
|
||||||
|
and close () = close_in cin in
|
||||||
|
{read = read_token; buffer; get_pos; get_last; close}
|
||||||
|
|
||||||
|
(* Standalone lexer for debugging purposes *)
|
||||||
|
|
||||||
|
(* Pretty-printing in a string the lexemes making up the markup
|
||||||
|
between two tokens, concatenated with the last lexeme itself. *)
|
||||||
|
|
||||||
|
let output_token ?(offsets=true) mode command
|
||||||
|
channel left_mark token : unit =
|
||||||
|
let output str = Printf.fprintf channel "%s%!" str in
|
||||||
|
let output_nl str = output (str ^ "\n") in
|
||||||
|
match command with
|
||||||
|
EvalOpt.Quiet -> ()
|
||||||
|
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
|
||||||
|
| EvalOpt.Copy ->
|
||||||
|
let lexeme = Token.to_lexeme token
|
||||||
|
and apply acc markup = Markup.to_lexeme markup :: acc
|
||||||
|
in List.fold_left apply [lexeme] left_mark
|
||||||
|
|> String.concat "" |> output
|
||||||
|
| EvalOpt.Units ->
|
||||||
|
let abs_token = Token.to_string token ~offsets mode
|
||||||
|
and apply acc markup =
|
||||||
|
Markup.to_string markup ~offsets mode :: acc
|
||||||
|
in List.fold_left apply [abs_token] left_mark
|
||||||
|
|> String.concat "\n" |> output_nl
|
||||||
|
|
||||||
|
let print_error ?(offsets=true) mode Region.{region; value} =
|
||||||
|
let msg = error_to_string value in
|
||||||
|
let file = match EvalOpt.input with
|
||||||
|
None | Some "-" -> false
|
||||||
|
| Some _ -> true in
|
||||||
|
let reg = region#to_string ~file ~offsets mode in
|
||||||
|
Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
|
||||||
|
|
||||||
|
let trace ?(offsets=true) mode file_path_opt command : unit =
|
||||||
|
try
|
||||||
|
let {read; buffer; close; _} = open_token_stream file_path_opt
|
||||||
|
and cout = stdout in
|
||||||
|
let log = output_token ~offsets mode command cout
|
||||||
|
and close_all () = close (); close_out cout in
|
||||||
|
let rec iter () =
|
||||||
|
match read ~log buffer with
|
||||||
|
token ->
|
||||||
|
if Token.is_eof token then close_all ()
|
||||||
|
else iter ()
|
||||||
|
| exception Error e -> print_error ~offsets mode e; close_all ()
|
||||||
|
in iter ()
|
||||||
|
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
|
||||||
|
|
||||||
|
end (* of functor [Make] in HEADER *)
|
||||||
|
(* END TRAILER *)
|
||||||
|
}
|
17
LexerMain.ml
Normal file
17
LexerMain.ml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
(* Driver for the lexer of Ligo *)
|
||||||
|
|
||||||
|
open! EvalOpt (* Reads the command-line options: Effectful! *)
|
||||||
|
|
||||||
|
(* Error printing and exception tracing *)
|
||||||
|
|
||||||
|
let () = Printexc.record_backtrace true
|
||||||
|
|
||||||
|
let external_ text =
|
||||||
|
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||||
|
|
||||||
|
(* Running the lexer on the input file *)
|
||||||
|
|
||||||
|
module Lexer = Lexer.Make (LexToken)
|
||||||
|
|
||||||
|
let () = Lexer.trace ~offsets:EvalOpt.offsets
|
||||||
|
EvalOpt.mode EvalOpt.input EvalOpt.cmd
|
6
MBytes.ml
Normal file
6
MBytes.ml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
(* TEMPORARY: SHOULD BE ERASED *)
|
||||||
|
|
||||||
|
type t = Hex.t
|
||||||
|
|
||||||
|
let of_hex x = x
|
||||||
|
let to_hex x = x
|
6
MBytes.mli
Normal file
6
MBytes.mli
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
(* TEMPORARY: SHOULD BE ERASED *)
|
||||||
|
|
||||||
|
type t
|
||||||
|
|
||||||
|
val of_hex : Hex.t -> t
|
||||||
|
val to_hex : t -> Hex.t
|
42
Markup.ml
Normal file
42
Markup.ml
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
type t =
|
||||||
|
Tabs of int Region.reg
|
||||||
|
| Space of int Region.reg
|
||||||
|
| Newline of lexeme Region.reg
|
||||||
|
| LineCom of lexeme Region.reg
|
||||||
|
| BlockCom of lexeme Region.reg
|
||||||
|
| BOM of lexeme Region.reg
|
||||||
|
|
||||||
|
type markup = t
|
||||||
|
|
||||||
|
(* Pretty-printing *)
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
let to_lexeme = function
|
||||||
|
Tabs Region.{value;_} -> String.make value '\t'
|
||||||
|
| Space Region.{value;_} -> String.make value ' '
|
||||||
|
| Newline Region.{value;_}
|
||||||
|
| LineCom Region.{value;_}
|
||||||
|
| BlockCom Region.{value;_}
|
||||||
|
| BOM Region.{value;_} -> value
|
||||||
|
|
||||||
|
let to_string markup ?(offsets=true) mode =
|
||||||
|
let region, val_str =
|
||||||
|
match markup with
|
||||||
|
Tabs Region.{value; region} ->
|
||||||
|
let lex = String.make value '\t' |> String.escaped
|
||||||
|
in region, sprintf "Tabs \"%s\"" lex
|
||||||
|
| Space Region.{value; region} ->
|
||||||
|
region, sprintf "Space \"%s\"" (String.make value ' ')
|
||||||
|
| Newline Region.{value; region} ->
|
||||||
|
region, sprintf "Newline \"%s\"" (String.escaped value)
|
||||||
|
| LineCom Region.{value; region} ->
|
||||||
|
region, sprintf "LineCom \"%s\"" (String.escaped value)
|
||||||
|
| BlockCom Region.{value; region} ->
|
||||||
|
region, sprintf "BlockCom \"%s\"" (String.escaped value)
|
||||||
|
| BOM Region.{value; region} ->
|
||||||
|
region, sprintf "BOM \"%s\"" (String.escaped value) in
|
||||||
|
let reg_str = region#compact ~offsets mode
|
||||||
|
in sprintf "%s: %s" reg_str val_str
|
32
Markup.mli
Normal file
32
Markup.mli
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
(* This module defines the sorts of markup recognised by the Ligo
|
||||||
|
lexer *)
|
||||||
|
|
||||||
|
(* A lexeme is piece of concrete syntax belonging to a token. In
|
||||||
|
algebraic terms, a token is also a piece of abstract lexical
|
||||||
|
syntax. Lexical units emcompass both markup and lexemes. *)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
type t =
|
||||||
|
Tabs of int Region.reg (* Tabulations *)
|
||||||
|
| Space of int Region.reg (* Space *)
|
||||||
|
| Newline of lexeme Region.reg (* "\n" or "\c\r" escape characters *)
|
||||||
|
| LineCom of lexeme Region.reg (* Line comments *)
|
||||||
|
| BlockCom of lexeme Region.reg (* Block comments *)
|
||||||
|
| BOM of lexeme Region.reg (* Byte-Order Mark for UTF-8 (optional) *)
|
||||||
|
|
||||||
|
type markup = t
|
||||||
|
|
||||||
|
(* Pretty-printing of markup
|
||||||
|
|
||||||
|
The difference between [to_lexeme] and [to_string] is that the
|
||||||
|
former builds the corresponding concrete syntax (the lexeme),
|
||||||
|
whilst the latter makes up a textual representation of the abstract
|
||||||
|
syntax (the OCaml data constructors).
|
||||||
|
|
||||||
|
The result of [to_string] is escaped to avoid capture by the
|
||||||
|
terminal.
|
||||||
|
*)
|
||||||
|
|
||||||
|
val to_lexeme : t -> lexeme
|
||||||
|
val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string
|
87
ParToken.mly
Normal file
87
ParToken.mly
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
%{
|
||||||
|
%}
|
||||||
|
|
||||||
|
(* Tokens (mirroring thise defined in module LexToken) *)
|
||||||
|
|
||||||
|
(* Literals *)
|
||||||
|
|
||||||
|
%token <LexToken.lexeme Region.reg> String
|
||||||
|
%token <(LexToken.lexeme * MBytes.t) Region.reg> Bytes
|
||||||
|
%token <(LexToken.lexeme * Z.t) Region.reg> Int
|
||||||
|
%token <LexToken.lexeme Region.reg> Ident
|
||||||
|
%token <LexToken.lexeme Region.reg> Constr
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
%token <Region.t> SEMI
|
||||||
|
%token <Region.t> COMMA
|
||||||
|
%token <Region.t> LPAR
|
||||||
|
%token <Region.t> RPAR
|
||||||
|
%token <Region.t> LBRACE
|
||||||
|
%token <Region.t> RBRACE
|
||||||
|
%token <Region.t> LBRACKET
|
||||||
|
%token <Region.t> RBRACKET
|
||||||
|
%token <Region.t> CONS
|
||||||
|
%token <Region.t> VBAR
|
||||||
|
%token <Region.t> ARROW
|
||||||
|
%token <Region.t> ASGNMNT
|
||||||
|
%token <Region.t> EQUAL
|
||||||
|
%token <Region.t> COLON
|
||||||
|
%token <Region.t> OR
|
||||||
|
%token <Region.t> AND
|
||||||
|
%token <Region.t> LT
|
||||||
|
%token <Region.t> LEQ
|
||||||
|
%token <Region.t> GT
|
||||||
|
%token <Region.t> GEQ
|
||||||
|
%token <Region.t> NEQ
|
||||||
|
%token <Region.t> PLUS
|
||||||
|
%token <Region.t> MINUS
|
||||||
|
%token <Region.t> SLASH
|
||||||
|
%token <Region.t> TIMES
|
||||||
|
%token <Region.t> DOT
|
||||||
|
%token <Region.t> WILD
|
||||||
|
%token <Region.t> CAT
|
||||||
|
|
||||||
|
(* Keywords *)
|
||||||
|
|
||||||
|
%token <Region.t> Begin
|
||||||
|
%token <Region.t> Const
|
||||||
|
%token <Region.t> Down
|
||||||
|
%token <Region.t> If
|
||||||
|
%token <Region.t> In
|
||||||
|
%token <Region.t> Is
|
||||||
|
%token <Region.t> For
|
||||||
|
%token <Region.t> Function
|
||||||
|
%token <Region.t> Parameter
|
||||||
|
%token <Region.t> Storage
|
||||||
|
%token <Region.t> Type
|
||||||
|
%token <Region.t> Of
|
||||||
|
%token <Region.t> Operations
|
||||||
|
%token <Region.t> Var
|
||||||
|
%token <Region.t> End
|
||||||
|
%token <Region.t> Then
|
||||||
|
%token <Region.t> Else
|
||||||
|
%token <Region.t> Match
|
||||||
|
%token <Region.t> Null
|
||||||
|
%token <Region.t> Procedure
|
||||||
|
%token <Region.t> Record
|
||||||
|
%token <Region.t> Step
|
||||||
|
%token <Region.t> To
|
||||||
|
%token <Region.t> Mod
|
||||||
|
%token <Region.t> Not
|
||||||
|
%token <Region.t> While
|
||||||
|
%token <Region.t> With
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
|
%token <Region.t> C_False
|
||||||
|
%token <Region.t> C_None
|
||||||
|
%token <Region.t> C_Some
|
||||||
|
%token <Region.t> C_True
|
||||||
|
%token <Region.t> C_Unit
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
|
%token <Region.t> EOF
|
||||||
|
|
||||||
|
%%
|
627
Parser.mly
Normal file
627
Parser.mly
Normal file
@ -0,0 +1,627 @@
|
|||||||
|
%{
|
||||||
|
(* START HEADER *)
|
||||||
|
|
||||||
|
open Region
|
||||||
|
open AST
|
||||||
|
|
||||||
|
(* END HEADER *)
|
||||||
|
%}
|
||||||
|
|
||||||
|
(* Entry points *)
|
||||||
|
|
||||||
|
%start program
|
||||||
|
%type <AST.t> program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
(* RULES *)
|
||||||
|
|
||||||
|
(* Compound constructs *)
|
||||||
|
|
||||||
|
par(X):
|
||||||
|
LPAR X RPAR {
|
||||||
|
let region = cover $1 $3
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
braces(X):
|
||||||
|
LBRACE X RBRACE {
|
||||||
|
let region = cover $1 $3
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
brackets(X):
|
||||||
|
LBRACKET X RBRACKET {
|
||||||
|
let region = cover $1 $3
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
(* Sequences
|
||||||
|
|
||||||
|
Series of instances of the same syntactical category have often to
|
||||||
|
be parsed, like lists of expressions, patterns etc. The simplest of
|
||||||
|
all is the possibly empty sequence (series), parsed below by
|
||||||
|
[seq]. The non-empty sequence is parsed by [nseq]. Note that the
|
||||||
|
latter returns a pair made of the first parsed item (the parameter
|
||||||
|
[X]) and the rest of the sequence (possibly empty). This way, the
|
||||||
|
OCaml typechecker can keep track of this information along the
|
||||||
|
static control-flow graph. The rule [sepseq] parses possibly empty
|
||||||
|
sequences of items separated by some token (e.g., a comma), and
|
||||||
|
rule [nsepseq] is for non-empty such sequences. See module [Utils]
|
||||||
|
for the types corresponding to the semantic actions of those rules.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* Possibly empty sequence of items *)
|
||||||
|
|
||||||
|
seq(X):
|
||||||
|
(**) { [] }
|
||||||
|
| X seq(X) { $1::$2 }
|
||||||
|
|
||||||
|
(* Non-empty sequence of items *)
|
||||||
|
|
||||||
|
nseq(X):
|
||||||
|
X seq(X) { $1,$2 }
|
||||||
|
|
||||||
|
(* Non-empty separated sequence of items *)
|
||||||
|
|
||||||
|
nsepseq(X,Sep):
|
||||||
|
X { $1, [] }
|
||||||
|
| X Sep nsepseq(X,Sep) { let h,t = $3 in $1, ($2,h)::t }
|
||||||
|
|
||||||
|
(* Possibly empy separated sequence of items *)
|
||||||
|
|
||||||
|
sepseq(X,Sep):
|
||||||
|
(**) { None }
|
||||||
|
| nsepseq(X,Sep) { Some $1 }
|
||||||
|
|
||||||
|
(* Inlines *)
|
||||||
|
|
||||||
|
%inline var : Ident { $1 }
|
||||||
|
%inline type_name : Ident { $1 }
|
||||||
|
%inline fun_name : Ident { $1 }
|
||||||
|
%inline field_name : Ident { $1 }
|
||||||
|
%inline map_name : Ident { $1 }
|
||||||
|
|
||||||
|
(* Main *)
|
||||||
|
|
||||||
|
program:
|
||||||
|
seq(type_decl)
|
||||||
|
parameter_decl
|
||||||
|
storage_decl
|
||||||
|
operations_decl
|
||||||
|
seq(lambda_decl)
|
||||||
|
block
|
||||||
|
EOF {
|
||||||
|
object
|
||||||
|
method types = $1
|
||||||
|
method parameter = $2
|
||||||
|
method storage = $3
|
||||||
|
method operations = $4
|
||||||
|
method lambdas = $5
|
||||||
|
method block = $6
|
||||||
|
method eof = $7
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
|
parameter_decl:
|
||||||
|
Parameter var COLON type_expr {
|
||||||
|
let stop = type_expr_to_region $4
|
||||||
|
in {region = cover $1 stop;
|
||||||
|
value = $1,$2,$3,$4}
|
||||||
|
}
|
||||||
|
|
||||||
|
storage_decl:
|
||||||
|
Storage type_expr {
|
||||||
|
let stop = type_expr_to_region $2
|
||||||
|
in {region = cover $1 stop;
|
||||||
|
value = $1,$2}
|
||||||
|
}
|
||||||
|
|
||||||
|
operations_decl:
|
||||||
|
Operations type_expr {
|
||||||
|
let stop = type_expr_to_region $2
|
||||||
|
in {region = cover $1 stop;
|
||||||
|
value = $1,$2}
|
||||||
|
}
|
||||||
|
|
||||||
|
(* Type declarations *)
|
||||||
|
|
||||||
|
type_decl:
|
||||||
|
Type type_name Is type_expr {
|
||||||
|
{region = cover $1 (type_expr_to_region $4);
|
||||||
|
value = $1,$2,$3,$4}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_expr:
|
||||||
|
cartesian { Prod $1 }
|
||||||
|
| sum_type { Sum $1 }
|
||||||
|
| record_type { Record $1 }
|
||||||
|
|
||||||
|
cartesian:
|
||||||
|
nsepseq(core_type,TIMES) {
|
||||||
|
let region = nsepseq_to_region type_expr_to_region $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
core_type:
|
||||||
|
type_name {
|
||||||
|
TAlias $1
|
||||||
|
}
|
||||||
|
| type_name type_tuple {
|
||||||
|
let region = cover $1.region $2.region
|
||||||
|
in TypeApp {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
| par(type_expr) {
|
||||||
|
ParType $1
|
||||||
|
}
|
||||||
|
|
||||||
|
type_tuple:
|
||||||
|
par(nsepseq(type_name,COMMA)) { $1 }
|
||||||
|
|
||||||
|
sum_type:
|
||||||
|
nsepseq(variant,VBAR) {
|
||||||
|
let region = nsepseq_to_region (fun x -> x.region) $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
variant:
|
||||||
|
Constr Of cartesian {
|
||||||
|
let region = cover $1.region $3.region
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
record_type:
|
||||||
|
Record
|
||||||
|
nsepseq(field_decl,SEMI)
|
||||||
|
End {
|
||||||
|
let region = cover $1 $3
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
field_decl:
|
||||||
|
field_name COLON type_expr {
|
||||||
|
let stop = type_expr_to_region $3 in
|
||||||
|
let region = cover $1.region stop
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
(* Function and procedure declarations *)
|
||||||
|
|
||||||
|
lambda_decl:
|
||||||
|
fun_decl { FunDecl $1 }
|
||||||
|
| proc_decl { ProcDecl $1 }
|
||||||
|
|
||||||
|
fun_decl:
|
||||||
|
Function fun_name parameters COLON type_expr Is
|
||||||
|
block
|
||||||
|
With expr {
|
||||||
|
let region = cover $1 (expr_to_region $9) in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_function = $1
|
||||||
|
method var = $2
|
||||||
|
method param = $3
|
||||||
|
method colon = $4
|
||||||
|
method ret_type = $5
|
||||||
|
method kwd_is = $6
|
||||||
|
method body = $7
|
||||||
|
method kwd_with = $8
|
||||||
|
method return = $9
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
proc_decl:
|
||||||
|
Procedure fun_name parameters Is
|
||||||
|
block {
|
||||||
|
let region = cover $1 $5.region in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_procedure = $1
|
||||||
|
method var = $2
|
||||||
|
method param = $3
|
||||||
|
method kwd_is = $4
|
||||||
|
method body = $5
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
parameters:
|
||||||
|
par(nsepseq(param_decl,SEMI)) { $1 }
|
||||||
|
|
||||||
|
param_decl:
|
||||||
|
var_kind var COLON type_expr {
|
||||||
|
let start = var_kind_to_region $1 in
|
||||||
|
let stop = type_expr_to_region $4 in
|
||||||
|
let region = cover start stop
|
||||||
|
in {region; value = $1,$2,$3,$4}
|
||||||
|
}
|
||||||
|
|
||||||
|
var_kind:
|
||||||
|
Var { Mutable $1 }
|
||||||
|
| Const { Const $1 }
|
||||||
|
|
||||||
|
block:
|
||||||
|
value_decls
|
||||||
|
Begin
|
||||||
|
instructions
|
||||||
|
End {
|
||||||
|
let region = cover $1.region $4 in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method decls = $1
|
||||||
|
method opening = $2
|
||||||
|
method instr = $3
|
||||||
|
method close = $4
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
value_decls:
|
||||||
|
sepseq(var_decl,SEMI) {
|
||||||
|
let region = sepseq_to_region (fun x -> x.region) $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
var_decl:
|
||||||
|
Var var COLON type_expr ASGNMNT expr {
|
||||||
|
let region = cover $1 (expr_to_region $6) in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kind = Mutable $1
|
||||||
|
method var = $2
|
||||||
|
method colon = $3
|
||||||
|
method vtype = $4
|
||||||
|
method setter = $5
|
||||||
|
method init = $6
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
| Const var COLON type_expr EQUAL expr {
|
||||||
|
let region = cover $1 (expr_to_region $6) in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kind = Const $1
|
||||||
|
method var = $2
|
||||||
|
method colon = $3
|
||||||
|
method vtype = $4
|
||||||
|
method setter = $5
|
||||||
|
method init = $6
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
instructions:
|
||||||
|
nsepseq(instruction,SEMI) {
|
||||||
|
let region = nsepseq_to_region instr_to_region $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction:
|
||||||
|
single_instr { Single $1 }
|
||||||
|
| block { Block $1 }
|
||||||
|
|
||||||
|
single_instr:
|
||||||
|
conditional { Cond $1 }
|
||||||
|
| match_instr { Match $1 }
|
||||||
|
| asgnmnt { Asgnmnt $1 }
|
||||||
|
| loop { Loop $1 }
|
||||||
|
| proc_call { ProcCall $1 }
|
||||||
|
| Null { Null $1 }
|
||||||
|
|
||||||
|
proc_call:
|
||||||
|
fun_call { $1 }
|
||||||
|
|
||||||
|
conditional:
|
||||||
|
If expr Then instruction Else instruction {
|
||||||
|
let region = cover $1 (instr_to_region $6) in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_if = $1
|
||||||
|
method test = $2
|
||||||
|
method kwd_then = $3
|
||||||
|
method ifso = $4
|
||||||
|
method kwd_else = $5
|
||||||
|
method ifnot = $6
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
match_instr:
|
||||||
|
Match expr With cases End {
|
||||||
|
let region = cover $1 $5 in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_match = $1
|
||||||
|
method expr = $2
|
||||||
|
method kwd_with = $3
|
||||||
|
method cases = $4
|
||||||
|
method kwd_end = $5
|
||||||
|
end
|
||||||
|
in {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
cases:
|
||||||
|
nsepseq(case,VBAR) {
|
||||||
|
let region = nsepseq_to_region (fun x -> x.region) $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
case:
|
||||||
|
pattern ARROW instruction {
|
||||||
|
let region = cover $1.region (instr_to_region $3)
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
asgnmnt:
|
||||||
|
var ASGNMNT expr {
|
||||||
|
let region = cover $1.region (expr_to_region $3)
|
||||||
|
in {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
loop:
|
||||||
|
while_loop { $1 }
|
||||||
|
| for_loop { $1 }
|
||||||
|
|
||||||
|
while_loop:
|
||||||
|
While expr block {
|
||||||
|
let region = cover $1 $3.region
|
||||||
|
in While {region; value=$1,$2,$3}
|
||||||
|
}
|
||||||
|
|
||||||
|
for_loop:
|
||||||
|
For asgnmnt Down? To expr option(step_clause) block {
|
||||||
|
let region = cover $1 $7.region in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_for = $1
|
||||||
|
method asgnmnt = $2
|
||||||
|
method down = $3
|
||||||
|
method kwd_to = $4
|
||||||
|
method bound = $5
|
||||||
|
method step = $6
|
||||||
|
method block = $7
|
||||||
|
end
|
||||||
|
in For (ForInt {region; value})
|
||||||
|
}
|
||||||
|
|
||||||
|
| For var option(arrow_clause) In expr block {
|
||||||
|
let region = cover $1 $6.region in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method kwd_for = $1
|
||||||
|
method var = $2
|
||||||
|
method bind_to = $3
|
||||||
|
method kwd_in = $4
|
||||||
|
method expr = $5
|
||||||
|
method block = $6
|
||||||
|
end
|
||||||
|
in For (ForCollect {region; value})
|
||||||
|
}
|
||||||
|
|
||||||
|
step_clause:
|
||||||
|
Step expr { $1,$2 }
|
||||||
|
|
||||||
|
arrow_clause:
|
||||||
|
ARROW var { $1,$2 }
|
||||||
|
|
||||||
|
(* Expressions *)
|
||||||
|
|
||||||
|
expr:
|
||||||
|
expr OR conj_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Or {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| conj_expr { $1 }
|
||||||
|
|
||||||
|
conj_expr:
|
||||||
|
conj_expr AND comp_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
And {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr { $1 }
|
||||||
|
|
||||||
|
comp_expr:
|
||||||
|
comp_expr LT cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Lt {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr LEQ cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Leq {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr GT cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Gt {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr GEQ cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Geq {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr EQUAL cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Equal {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| comp_expr NEQ cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Neq {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| cat_expr { $1 }
|
||||||
|
|
||||||
|
cat_expr:
|
||||||
|
cons_expr CAT cat_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Cat {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| cons_expr { $1 }
|
||||||
|
|
||||||
|
cons_expr:
|
||||||
|
add_expr CONS cons_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Cons {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| add_expr { $1 }
|
||||||
|
|
||||||
|
add_expr:
|
||||||
|
add_expr PLUS mult_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Add {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| add_expr MINUS mult_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Sub {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| mult_expr { $1 }
|
||||||
|
|
||||||
|
mult_expr:
|
||||||
|
mult_expr TIMES unary_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Mult {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| mult_expr SLASH unary_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Div {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| mult_expr Mod unary_expr {
|
||||||
|
let start = expr_to_region $1
|
||||||
|
and stop = expr_to_region $3 in
|
||||||
|
let region = cover start stop in
|
||||||
|
Mod {region; value = $1,$2,$3}
|
||||||
|
}
|
||||||
|
| unary_expr { $1 }
|
||||||
|
|
||||||
|
unary_expr:
|
||||||
|
MINUS core_expr {
|
||||||
|
let stop = expr_to_region $2 in
|
||||||
|
let region = cover $1 stop in
|
||||||
|
Neg {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
| Not core_expr {
|
||||||
|
let stop = expr_to_region $2 in
|
||||||
|
let region = cover $1 stop in
|
||||||
|
Not {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
| core_expr { $1 }
|
||||||
|
|
||||||
|
core_expr:
|
||||||
|
Int { Int $1 }
|
||||||
|
| var { Var $1 }
|
||||||
|
| String { String $1 }
|
||||||
|
| Bytes { Bytes $1 }
|
||||||
|
| C_False { False $1 }
|
||||||
|
| C_True { True $1 }
|
||||||
|
| C_Unit { Unit $1 }
|
||||||
|
| tuple { Tuple $1 }
|
||||||
|
| list_expr { List $1 }
|
||||||
|
| empty_list { EmptyList $1 }
|
||||||
|
| set_expr { Set $1 }
|
||||||
|
| empty_set { EmptySet $1 }
|
||||||
|
| none_expr { NoneExpr $1 }
|
||||||
|
| fun_call { FunCall $1 }
|
||||||
|
| Constr arguments {
|
||||||
|
let region = cover $1.region $2.region in
|
||||||
|
ConstrApp {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
| C_Some arguments {
|
||||||
|
let region = cover $1 $2.region in
|
||||||
|
SomeApp {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
| map_name DOT brackets(expr) {
|
||||||
|
let region = cover $1.region $3.region in
|
||||||
|
let value =
|
||||||
|
object
|
||||||
|
method map_name = $1
|
||||||
|
method selector = $2
|
||||||
|
method index = $3
|
||||||
|
end
|
||||||
|
in MapLookUp {region; value}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun_call:
|
||||||
|
fun_name arguments {
|
||||||
|
let region = cover $1.region $2.region
|
||||||
|
in {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
|
||||||
|
tuple:
|
||||||
|
par(nsepseq(expr,COMMA)) { $1 }
|
||||||
|
|
||||||
|
arguments:
|
||||||
|
tuple { $1 }
|
||||||
|
|
||||||
|
list_expr:
|
||||||
|
brackets(nsepseq(expr,COMMA)) { $1 }
|
||||||
|
|
||||||
|
empty_list:
|
||||||
|
par(LBRACKET RBRACKET COLON type_expr { $1,$2,$3,$4 }) { $1 }
|
||||||
|
|
||||||
|
set_expr:
|
||||||
|
braces(nsepseq(expr,COMMA)) { $1 }
|
||||||
|
|
||||||
|
empty_set:
|
||||||
|
par(LBRACE RBRACE COLON type_expr { $1,$2,$3,$4 }) { $1 }
|
||||||
|
|
||||||
|
none_expr:
|
||||||
|
par(C_None COLON type_expr { $1,$2,$3 }) { $1 }
|
||||||
|
|
||||||
|
(* Patterns *)
|
||||||
|
|
||||||
|
pattern:
|
||||||
|
nsepseq(core_pattern,CONS) {
|
||||||
|
let region = nsepseq_to_region core_pattern_to_region $1
|
||||||
|
in {region; value=$1}
|
||||||
|
}
|
||||||
|
|
||||||
|
core_pattern:
|
||||||
|
var { PVar $1 }
|
||||||
|
| WILD { PWild $1 }
|
||||||
|
| Int { PInt $1 }
|
||||||
|
| String { PString $1 }
|
||||||
|
| C_Unit { PUnit $1 }
|
||||||
|
| C_False { PFalse $1 }
|
||||||
|
| C_True { PTrue $1 }
|
||||||
|
| C_None { PNone $1 }
|
||||||
|
| list_patt { PList $1 }
|
||||||
|
| tuple_patt { PTuple $1 }
|
||||||
|
| C_Some par(core_pattern) {
|
||||||
|
let region = cover $1 $2.region
|
||||||
|
in PSome {region; value = $1,$2}
|
||||||
|
}
|
||||||
|
|
||||||
|
list_patt:
|
||||||
|
brackets(sepseq(core_pattern,COMMA)) { Sugar $1 }
|
||||||
|
| par(cons_pattern) { Raw $1 }
|
||||||
|
|
||||||
|
cons_pattern:
|
||||||
|
core_pattern CONS pattern { $1,$2,$3 }
|
||||||
|
|
||||||
|
tuple_patt:
|
||||||
|
par(nsepseq(core_pattern,COMMA)) { $1 }
|
71
ParserMain.ml
Normal file
71
ParserMain.ml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
(* Driver for the parser of Ligo *)
|
||||||
|
|
||||||
|
open! EvalOpt (* Reads the command-line options: Effectful! *)
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
(* Error printing and exception tracing *)
|
||||||
|
|
||||||
|
let () = Printexc.record_backtrace true
|
||||||
|
|
||||||
|
let external_ text =
|
||||||
|
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||||
|
|
||||||
|
type Error.t += ParseError
|
||||||
|
|
||||||
|
let error_to_string = function
|
||||||
|
ParseError -> "Syntax error.\n"
|
||||||
|
| _ -> assert false
|
||||||
|
|
||||||
|
let print_error ?(offsets=true) mode Region.{region; value} =
|
||||||
|
let msg = error_to_string value in
|
||||||
|
let file = match EvalOpt.input with
|
||||||
|
None | Some "-" -> false
|
||||||
|
| Some _ -> true in
|
||||||
|
let reg = region#to_string ~file ~offsets mode in
|
||||||
|
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
|
||||||
|
|
||||||
|
(* Path to the Ligo standard library *)
|
||||||
|
(*
|
||||||
|
let lib_path =
|
||||||
|
match EvalOpt.libs with
|
||||||
|
[] -> ""
|
||||||
|
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
|
||||||
|
in List.fold_right mk_I libs ""
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* Instanciating the lexer *)
|
||||||
|
|
||||||
|
module Lexer = Lexer.Make (LexToken)
|
||||||
|
|
||||||
|
let Lexer.{read; buffer; get_pos; get_last; close} =
|
||||||
|
Lexer.open_token_stream EvalOpt.input
|
||||||
|
|
||||||
|
and cout = stdout
|
||||||
|
|
||||||
|
let log = Lexer.output_token ~offsets:EvalOpt.offsets
|
||||||
|
EvalOpt.mode EvalOpt.cmd cout
|
||||||
|
|
||||||
|
and close_all () = close (); close_out cout
|
||||||
|
|
||||||
|
(* Tokeniser *)
|
||||||
|
|
||||||
|
let tokeniser = read ~log
|
||||||
|
|
||||||
|
(* Main *)
|
||||||
|
|
||||||
|
let () =
|
||||||
|
try
|
||||||
|
let ast = Parser.program tokeniser buffer in
|
||||||
|
if Utils.String.Set.mem "parser" EvalOpt.verbose
|
||||||
|
then AST.print_tokens ast
|
||||||
|
with
|
||||||
|
Lexer.Error err ->
|
||||||
|
close_all ();
|
||||||
|
Lexer.print_error ~offsets EvalOpt.mode err
|
||||||
|
| Parser.Error ->
|
||||||
|
let region = get_last () in
|
||||||
|
let error = Region.{region; value=ParseError} in
|
||||||
|
let () = close_all () in
|
||||||
|
print_error ~offsets EvalOpt.mode error
|
||||||
|
| Sys_error msg -> Utils.highlight msg
|
124
Pos.ml
Normal file
124
Pos.ml
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
type t = <
|
||||||
|
byte : Lexing.position;
|
||||||
|
point_num : int;
|
||||||
|
point_bol : int;
|
||||||
|
file : string;
|
||||||
|
line : int;
|
||||||
|
|
||||||
|
set_file : string -> t;
|
||||||
|
set_line : int -> t;
|
||||||
|
new_line : string -> t;
|
||||||
|
add_nl : t;
|
||||||
|
|
||||||
|
shift_bytes : int -> t;
|
||||||
|
shift_one_uchar : int -> t;
|
||||||
|
|
||||||
|
offset : [`Byte | `Point] -> int;
|
||||||
|
column : [`Byte | `Point] -> int;
|
||||||
|
|
||||||
|
line_offset : [`Byte | `Point] -> int;
|
||||||
|
byte_offset : int;
|
||||||
|
|
||||||
|
is_ghost : bool;
|
||||||
|
|
||||||
|
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
compact : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
>
|
||||||
|
|
||||||
|
type pos = t
|
||||||
|
|
||||||
|
(* Constructors *)
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
let make ~byte ~point_num ~point_bol =
|
||||||
|
let () = assert (point_num >= point_bol) in
|
||||||
|
object (self)
|
||||||
|
val byte = byte
|
||||||
|
method byte = byte
|
||||||
|
|
||||||
|
val point_num = point_num
|
||||||
|
method point_num = point_num
|
||||||
|
|
||||||
|
val point_bol = point_bol
|
||||||
|
method point_bol = point_bol
|
||||||
|
|
||||||
|
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
|
||||||
|
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >}
|
||||||
|
|
||||||
|
(* The string must not contain '\n'. See [new_line]. *)
|
||||||
|
|
||||||
|
method shift_bytes len =
|
||||||
|
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
|
||||||
|
point_num = point_num + len >}
|
||||||
|
|
||||||
|
method shift_one_uchar len =
|
||||||
|
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
|
||||||
|
point_num = point_num + 1 >}
|
||||||
|
|
||||||
|
method add_nl =
|
||||||
|
{< byte = Lexing.{byte with
|
||||||
|
pos_lnum = byte.pos_lnum + 1;
|
||||||
|
pos_bol = byte.pos_cnum};
|
||||||
|
point_bol = point_num >}
|
||||||
|
|
||||||
|
method new_line string =
|
||||||
|
let len = String.length string
|
||||||
|
in (self#shift_bytes len)#add_nl
|
||||||
|
|
||||||
|
method is_ghost = byte = Lexing.dummy_pos
|
||||||
|
|
||||||
|
method file = byte.Lexing.pos_fname
|
||||||
|
|
||||||
|
method line = byte.Lexing.pos_lnum
|
||||||
|
|
||||||
|
method offset = function
|
||||||
|
`Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
|
||||||
|
| `Point -> point_num - point_bol
|
||||||
|
|
||||||
|
method column mode = 1 + self#offset mode
|
||||||
|
|
||||||
|
method line_offset = function
|
||||||
|
`Byte -> byte.Lexing.pos_bol
|
||||||
|
| `Point -> point_bol
|
||||||
|
|
||||||
|
method byte_offset = byte.Lexing.pos_cnum
|
||||||
|
|
||||||
|
method to_string ?(offsets=true) mode =
|
||||||
|
let offset = self#offset mode in
|
||||||
|
let horizontal, value =
|
||||||
|
if offsets then "character", offset else "column", offset + 1
|
||||||
|
in sprintf "File \"%s\", line %i, %s %i"
|
||||||
|
self#file self#line horizontal value
|
||||||
|
|
||||||
|
method compact ?(offsets=true) mode =
|
||||||
|
if self#is_ghost then "ghost"
|
||||||
|
else
|
||||||
|
let offset = self#offset mode in
|
||||||
|
sprintf "%s:%i:%i"
|
||||||
|
self#file self#line (if offsets then offset else offset + 1)
|
||||||
|
|
||||||
|
method anonymous ?(offsets=true) mode =
|
||||||
|
if self#is_ghost then "ghost"
|
||||||
|
else sprintf "%i:%i" self#line
|
||||||
|
(if offsets then self#offset mode else self#column mode)
|
||||||
|
end
|
||||||
|
|
||||||
|
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
|
||||||
|
|
||||||
|
let min =
|
||||||
|
let byte = Lexing.{
|
||||||
|
pos_fname = "";
|
||||||
|
pos_lnum = 1;
|
||||||
|
pos_bol = 0;
|
||||||
|
pos_cnum = 0}
|
||||||
|
in make ~byte ~point_num:0 ~point_bol:0
|
||||||
|
|
||||||
|
(* Comparisons *)
|
||||||
|
|
||||||
|
let equal pos1 pos2 =
|
||||||
|
pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
|
||||||
|
|
||||||
|
let lt pos1 pos2 =
|
||||||
|
pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset
|
105
Pos.mli
Normal file
105
Pos.mli
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
(* Positions in a file
|
||||||
|
|
||||||
|
A position in a file denotes a single unit belonging to it, for
|
||||||
|
example, in an ASCII text file, it is a particular character within
|
||||||
|
that file (the unit is the byte in this instance, since in ASCII
|
||||||
|
one character is encoded with one byte).
|
||||||
|
|
||||||
|
Units can be either bytes (as ASCII characters) or, more
|
||||||
|
generally, unicode points.
|
||||||
|
|
||||||
|
The type for positions is the object type [t].
|
||||||
|
|
||||||
|
We use here lexing positions to denote byte-oriented positions
|
||||||
|
(field [byte]), and we manage code points by means of the fields
|
||||||
|
[point_num] and [point_bol]. These two fields have a meaning
|
||||||
|
similar to the fields [pos_cnum] and [pos_bol], respectively, from
|
||||||
|
the standard module [Lexing]. That is to say, [point_num] holds the
|
||||||
|
number of code points since the beginning of the file, and
|
||||||
|
[point_bol] the number of code points since the beginning of the
|
||||||
|
current line.
|
||||||
|
|
||||||
|
The name of the file is given by the field [file], and the line
|
||||||
|
number by the field [line].
|
||||||
|
*)
|
||||||
|
|
||||||
|
type t = <
|
||||||
|
(* Payload *)
|
||||||
|
|
||||||
|
byte : Lexing.position;
|
||||||
|
point_num : int;
|
||||||
|
point_bol : int;
|
||||||
|
file : string;
|
||||||
|
line : int;
|
||||||
|
|
||||||
|
(* Setters *)
|
||||||
|
|
||||||
|
set_file : string -> t;
|
||||||
|
set_line : int -> t;
|
||||||
|
|
||||||
|
(* The call [pos#new_line s], where the string [s] is either "\n" or
|
||||||
|
"\c\r", updates the position [pos] with a new line. *)
|
||||||
|
|
||||||
|
new_line : string -> t;
|
||||||
|
add_nl : t;
|
||||||
|
|
||||||
|
(* The call [pos#shift_bytes n] evaluates in a position that is the
|
||||||
|
translation of position [pos] of [n] bytes forward in the
|
||||||
|
file. The call [pos#shift_one_uchar n] is similar, except that it
|
||||||
|
assumes that [n] is the number of bytes making up one unicode
|
||||||
|
point. *)
|
||||||
|
|
||||||
|
shift_bytes : int -> t;
|
||||||
|
shift_one_uchar : int -> t;
|
||||||
|
|
||||||
|
(* Getters *)
|
||||||
|
|
||||||
|
(* The call [pos#offset `Byte] provides the horizontal offset of the
|
||||||
|
position [pos] in bytes. (An offset is the number of units, like
|
||||||
|
bytes, since the beginning of the current line.) The call
|
||||||
|
[pos#offset `Point] is the offset counted in number of unicode
|
||||||
|
points.
|
||||||
|
|
||||||
|
The calls to the method [column] are similar to those to
|
||||||
|
[offset], except that they give the curren column number.
|
||||||
|
|
||||||
|
The call [pos#line_offset `Byte] is the offset of the line of
|
||||||
|
position [pos], counted in bytes. Dually, [pos#line_offset
|
||||||
|
`Point] counts the same offset in code points.
|
||||||
|
|
||||||
|
The call [pos#byte_offset] is the offset of the position [pos]
|
||||||
|
since the begininng of the file, counted in bytes.
|
||||||
|
*)
|
||||||
|
|
||||||
|
offset : [`Byte | `Point] -> int;
|
||||||
|
column : [`Byte | `Point] -> int;
|
||||||
|
|
||||||
|
line_offset : [`Byte | `Point] -> int;
|
||||||
|
byte_offset : int;
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
is_ghost : bool;
|
||||||
|
|
||||||
|
(* Conversions to [string] *)
|
||||||
|
|
||||||
|
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
compact : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
>
|
||||||
|
|
||||||
|
type pos = t
|
||||||
|
|
||||||
|
(* Constructors *)
|
||||||
|
|
||||||
|
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
|
||||||
|
|
||||||
|
(* Special positions *)
|
||||||
|
|
||||||
|
val ghost : t (* Same as [Lexing.dummy_pos] *)
|
||||||
|
val min : t (* Lexing convention: line 1, offsets to 0 and file to "". *)
|
||||||
|
|
||||||
|
(* Comparisons *)
|
||||||
|
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
val lt : t -> t -> bool
|
122
Region.ml
Normal file
122
Region.ml
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
(* Regions of a file *)
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
|
type t = <
|
||||||
|
start : Pos.t;
|
||||||
|
stop : Pos.t;
|
||||||
|
|
||||||
|
(* Setters *)
|
||||||
|
|
||||||
|
shift_bytes : int -> t;
|
||||||
|
shift_one_uchar : int -> t;
|
||||||
|
|
||||||
|
(* Getters *)
|
||||||
|
|
||||||
|
file : string;
|
||||||
|
pos : Pos.t * Pos.t;
|
||||||
|
byte_pos : Lexing.position * Lexing.position;
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
is_ghost : bool;
|
||||||
|
|
||||||
|
(* Conversions to [string] *)
|
||||||
|
|
||||||
|
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
>
|
||||||
|
|
||||||
|
type region = t
|
||||||
|
|
||||||
|
type 'a reg = {region: t; value: 'a}
|
||||||
|
|
||||||
|
(* Injections *)
|
||||||
|
|
||||||
|
exception Invalid
|
||||||
|
|
||||||
|
let make ~(start: Pos.t) ~(stop: Pos.t) =
|
||||||
|
if start#file <> stop#file || start#byte_offset > stop#byte_offset
|
||||||
|
then raise Invalid
|
||||||
|
else
|
||||||
|
object
|
||||||
|
val start = start
|
||||||
|
method start = start
|
||||||
|
val stop = stop
|
||||||
|
method stop = stop
|
||||||
|
|
||||||
|
method shift_bytes len =
|
||||||
|
let start = start#shift_bytes len
|
||||||
|
and stop = stop#shift_bytes len
|
||||||
|
in {< start = start; stop = stop >}
|
||||||
|
|
||||||
|
method shift_one_uchar len =
|
||||||
|
let start = start#shift_one_uchar len
|
||||||
|
and stop = stop#shift_one_uchar len
|
||||||
|
in {< start = start; stop = stop >}
|
||||||
|
|
||||||
|
(* Getters *)
|
||||||
|
|
||||||
|
method file = start#file
|
||||||
|
method pos = start, stop
|
||||||
|
method byte_pos = start#byte, stop#byte
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
method is_ghost = start#is_ghost && stop#is_ghost
|
||||||
|
|
||||||
|
(* Conversions to strings *)
|
||||||
|
|
||||||
|
method to_string ?(file=true) ?(offsets=true) mode =
|
||||||
|
let horizontal = if offsets then "character" else "column"
|
||||||
|
and start_offset =
|
||||||
|
if offsets then start#offset mode else start#column mode
|
||||||
|
and stop_offset =
|
||||||
|
if offsets then stop#offset mode else stop#column mode in
|
||||||
|
let info =
|
||||||
|
if file
|
||||||
|
then sprintf "in file \"%s\", line %i, %s"
|
||||||
|
(String.escaped start#file) start#line horizontal
|
||||||
|
else sprintf "at line %i, %s" start#line horizontal
|
||||||
|
in if stop#line = start#line
|
||||||
|
then sprintf "%ss %i-%i" info start_offset stop_offset
|
||||||
|
else sprintf "%s %i to line %i, %s %i"
|
||||||
|
info start_offset stop#line horizontal stop_offset
|
||||||
|
|
||||||
|
method compact ?(file=true) ?(offsets=true) mode =
|
||||||
|
let start_str = start#anonymous ~offsets mode
|
||||||
|
and stop_str = stop#anonymous ~offsets mode in
|
||||||
|
if start#file = stop#file then
|
||||||
|
if file then sprintf "%s:%s-%s" start#file start_str stop_str
|
||||||
|
else sprintf "%s-%s" start_str stop_str
|
||||||
|
else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Special regions *)
|
||||||
|
|
||||||
|
let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
|
||||||
|
|
||||||
|
let min = make ~start:Pos.min ~stop:Pos.min
|
||||||
|
|
||||||
|
(* Comparisons *)
|
||||||
|
|
||||||
|
let equal r1 r2 =
|
||||||
|
r1#file = r2#file
|
||||||
|
&& Pos.equal r1#start r2#start
|
||||||
|
&& Pos.equal r1#stop r2#stop
|
||||||
|
|
||||||
|
let lt r1 r2 =
|
||||||
|
r1#file = r2#file
|
||||||
|
&& not r1#is_ghost
|
||||||
|
&& not r2#is_ghost
|
||||||
|
&& Pos.lt r1#start r2#start
|
||||||
|
&& Pos.lt r1#stop r2#stop
|
||||||
|
|
||||||
|
let cover r1 r2 =
|
||||||
|
if r1#is_ghost
|
||||||
|
then r2
|
||||||
|
else if r2#is_ghost
|
||||||
|
then r1
|
||||||
|
else if lt r1 r2
|
||||||
|
then make ~start:r1#start ~stop:r2#stop
|
||||||
|
else make ~start:r2#start ~stop:r1#stop
|
123
Region.mli
Normal file
123
Region.mli
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
(* Regions of a file
|
||||||
|
|
||||||
|
A _region_ is a contiguous series of bytes, for example, in a text
|
||||||
|
file. It is here denoted by the object type [t].
|
||||||
|
|
||||||
|
The start (included) of the region is given by the field [start],
|
||||||
|
which is a _position_, and the end (excluded) is the position given
|
||||||
|
by the field [stop]. The convention of including the start and
|
||||||
|
excluding the end enables to have empty regions if, and only if,
|
||||||
|
[start = stop]. See module [Pos] for the definition of positions.
|
||||||
|
|
||||||
|
The first byte of a file starts at the offset zero (that is,
|
||||||
|
column one), and [start] is always lower than or equal to [stop],
|
||||||
|
and they must refer to the same file.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type t = <
|
||||||
|
start : Pos.t;
|
||||||
|
stop : Pos.t;
|
||||||
|
|
||||||
|
(* Setters *)
|
||||||
|
|
||||||
|
(* The call [region#shift_bytes n] evaluates in a region that is the
|
||||||
|
translation of region [region] of [n] bytes forward in the
|
||||||
|
file. The call [region#shift_one_uchar n] is similar, except that
|
||||||
|
it assumes that [n] is the number of bytes making up one unicode
|
||||||
|
point. *)
|
||||||
|
|
||||||
|
shift_bytes : int -> t;
|
||||||
|
shift_one_uchar : int -> t;
|
||||||
|
|
||||||
|
(* Getters *)
|
||||||
|
|
||||||
|
(* The method [file] returns the file name.
|
||||||
|
The method [pos] returns the values of the fields [start] and [stop].
|
||||||
|
The method [byte_pos] returns the start and end positions of the
|
||||||
|
region at hand _interpreting them as lexing positions_, that is,
|
||||||
|
the unit is the byte. *)
|
||||||
|
|
||||||
|
file : string;
|
||||||
|
pos : Pos.t * Pos.t;
|
||||||
|
byte_pos : Lexing.position * Lexing.position;
|
||||||
|
|
||||||
|
(* Predicates *)
|
||||||
|
|
||||||
|
is_ghost : bool;
|
||||||
|
|
||||||
|
(* Conversions to [string] *)
|
||||||
|
|
||||||
|
(* The call [region#to_string ~file ~offsets mode] evaluates in a
|
||||||
|
string denoting the region [region].
|
||||||
|
|
||||||
|
The name of the file is present if, and only if, [file = true] or
|
||||||
|
[file] is missing.
|
||||||
|
|
||||||
|
The positions in the file are expressed horizontal offsets if
|
||||||
|
[offsets = true] or [offsets] is missing (the default), otherwise
|
||||||
|
as columns.
|
||||||
|
|
||||||
|
If [mode = `Byte], those positions will be assumed to have bytes
|
||||||
|
as their unit, otherwise, if [mode = `Point], they will be
|
||||||
|
assumed to refer to code points.
|
||||||
|
|
||||||
|
The method [compact] has the same signature and calling
|
||||||
|
convention as [to_string], except that the resulting string is
|
||||||
|
more compact.
|
||||||
|
*)
|
||||||
|
|
||||||
|
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
|
||||||
|
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
|
>
|
||||||
|
|
||||||
|
type region = t
|
||||||
|
|
||||||
|
type 'a reg = {region: t; value: 'a}
|
||||||
|
|
||||||
|
(* Constructors *)
|
||||||
|
|
||||||
|
(* The function [make] creates a region from two positions. If the
|
||||||
|
positions are not properly ordered or refer to different files, the
|
||||||
|
exception [Invalid] is raised. *)
|
||||||
|
|
||||||
|
exception Invalid
|
||||||
|
|
||||||
|
val make : start:Pos.t -> stop:Pos.t -> t
|
||||||
|
|
||||||
|
(* Special regions *)
|
||||||
|
|
||||||
|
(* To deal with ghost expressions, that is, pieces of abstract syntax
|
||||||
|
that have not been built from excerpts of concrete syntax, we need
|
||||||
|
_ghost regions_. The module [Pos] provides a [ghost] position, and
|
||||||
|
we also provide a [ghost] region and, in type [t], the method
|
||||||
|
[is_ghost] to check it. *)
|
||||||
|
|
||||||
|
val ghost : t (* Two [Pos.ghost] positions *)
|
||||||
|
|
||||||
|
(* Occasionnally, we may need a minimum region. It is here made of two
|
||||||
|
minimal positions. *)
|
||||||
|
|
||||||
|
val min : t (* Two [Pos.min] positions *)
|
||||||
|
|
||||||
|
(* Comparisons *)
|
||||||
|
|
||||||
|
(* Two regions are equal if, and only if, they refer to the same file
|
||||||
|
and their start positions are equal and their stop positions are
|
||||||
|
equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
|
||||||
|
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
|
||||||
|
(* The call [lt r1 r2] ("lower than") has the value [true] if, and
|
||||||
|
only if, regions [r1] and [r2] refer to the same file, none is a
|
||||||
|
ghost and the start position of [r1] is lower than that of
|
||||||
|
[r2]. (See [Pos.lt].) *)
|
||||||
|
|
||||||
|
val lt : t -> t -> bool
|
||||||
|
|
||||||
|
(* Given two regions [r1] and [r2], we may want the region [cover r1
|
||||||
|
r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
|
||||||
|
(cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
|
||||||
|
that name because of the [min] function above.) If [r1] is a ghost,
|
||||||
|
the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
|
||||||
|
|
||||||
|
val cover : t -> t -> t
|
27
Tests/a.li
Normal file
27
Tests/a.li
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
type t is int * string
|
||||||
|
type u is t
|
||||||
|
type v is record foo: key; bar: mutez; baz: address end
|
||||||
|
type w is K of v * u
|
||||||
|
|
||||||
|
parameter p : v
|
||||||
|
storage w
|
||||||
|
operations u
|
||||||
|
|
||||||
|
function f (const x : int) : int is
|
||||||
|
var y : int := 5 - x;
|
||||||
|
const z : int = 6
|
||||||
|
begin
|
||||||
|
y := x + y
|
||||||
|
end with y * 2
|
||||||
|
|
||||||
|
procedure g (const l : list (int)) is
|
||||||
|
begin
|
||||||
|
match l with
|
||||||
|
[] -> null
|
||||||
|
| h<:t -> q (h+2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
begin
|
||||||
|
g (Unit)
|
||||||
|
end
|
157
Utils.ml
Normal file
157
Utils.ml
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
(* Utility types and functions *)
|
||||||
|
|
||||||
|
(* Identity *)
|
||||||
|
|
||||||
|
let id x = x
|
||||||
|
|
||||||
|
(* Combinators *)
|
||||||
|
|
||||||
|
let (<@) f g x = f (g x)
|
||||||
|
|
||||||
|
let swap f x y = f y x
|
||||||
|
|
||||||
|
let lambda = fun x _ -> x
|
||||||
|
|
||||||
|
let curry f x y = f (x,y)
|
||||||
|
let uncurry f (x,y) = f x y
|
||||||
|
|
||||||
|
(* Parametric rules for sequences *)
|
||||||
|
|
||||||
|
type 'a nseq = 'a * 'a list
|
||||||
|
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
|
||||||
|
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
|
||||||
|
|
||||||
|
(* Consing *)
|
||||||
|
|
||||||
|
let nseq_cons x (hd,tl) = x, hd::tl
|
||||||
|
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
|
||||||
|
|
||||||
|
let sepseq_cons x sep = function
|
||||||
|
None -> x, []
|
||||||
|
| Some (hd,tl) -> x, (sep,hd)::tl
|
||||||
|
|
||||||
|
(* Rightwards iterators *)
|
||||||
|
|
||||||
|
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
|
||||||
|
|
||||||
|
let nsepseq_foldl f a (hd,tl) =
|
||||||
|
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
|
||||||
|
|
||||||
|
let sepseq_foldl f a = function
|
||||||
|
None -> a
|
||||||
|
| Some s -> nsepseq_foldl f a s
|
||||||
|
|
||||||
|
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
|
||||||
|
|
||||||
|
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
|
||||||
|
|
||||||
|
let sepseq_iter f = function
|
||||||
|
None -> ()
|
||||||
|
| Some s -> nsepseq_iter f s
|
||||||
|
|
||||||
|
(* Reversing *)
|
||||||
|
|
||||||
|
let nseq_rev (hd,tl) =
|
||||||
|
let rec aux acc = function
|
||||||
|
[] -> acc
|
||||||
|
| x::l -> aux (nseq_cons x acc) l
|
||||||
|
in aux (hd,[]) tl
|
||||||
|
|
||||||
|
let nsepseq_rev =
|
||||||
|
let rec aux acc = function
|
||||||
|
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
|
||||||
|
| hd, [] -> hd, acc in
|
||||||
|
function
|
||||||
|
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
|
||||||
|
| s -> s
|
||||||
|
|
||||||
|
let sepseq_rev = function
|
||||||
|
None -> None
|
||||||
|
| Some seq -> Some (nsepseq_rev seq)
|
||||||
|
|
||||||
|
(* Leftwards iterators *)
|
||||||
|
|
||||||
|
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
|
||||||
|
|
||||||
|
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
|
||||||
|
|
||||||
|
let sepseq_foldr f = function
|
||||||
|
None -> fun a -> a
|
||||||
|
| Some s -> nsepseq_foldr f s
|
||||||
|
|
||||||
|
(* Conversions to lists *)
|
||||||
|
|
||||||
|
let nseq_to_list (x,y) = x::y
|
||||||
|
|
||||||
|
let nsepseq_to_list (x,y) = x :: List.map snd y
|
||||||
|
|
||||||
|
let sepseq_to_list = function
|
||||||
|
None -> []
|
||||||
|
| Some s -> nsepseq_to_list s
|
||||||
|
|
||||||
|
(* Optional values *)
|
||||||
|
|
||||||
|
module Option =
|
||||||
|
struct
|
||||||
|
let apply f x =
|
||||||
|
match x with
|
||||||
|
Some y -> Some (f y)
|
||||||
|
| None -> None
|
||||||
|
|
||||||
|
let rev_apply x y =
|
||||||
|
match x with
|
||||||
|
Some f -> f y
|
||||||
|
| None -> y
|
||||||
|
|
||||||
|
let to_string = function
|
||||||
|
Some x -> x
|
||||||
|
| None -> ""
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Modules based on [String], like sets and maps. *)
|
||||||
|
|
||||||
|
module String =
|
||||||
|
struct
|
||||||
|
include String
|
||||||
|
|
||||||
|
module Ord =
|
||||||
|
struct
|
||||||
|
type nonrec t = t
|
||||||
|
let compare = compare
|
||||||
|
end
|
||||||
|
|
||||||
|
module Map = Map.Make (Ord)
|
||||||
|
module Set = Set.Make (Ord)
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Integers *)
|
||||||
|
|
||||||
|
module Int =
|
||||||
|
struct
|
||||||
|
type t = int
|
||||||
|
|
||||||
|
module Ord =
|
||||||
|
struct
|
||||||
|
type nonrec t = t
|
||||||
|
let compare = compare
|
||||||
|
end
|
||||||
|
|
||||||
|
module Map = Map.Make (Ord)
|
||||||
|
module Set = Set.Make (Ord)
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Effectful symbol generator *)
|
||||||
|
|
||||||
|
let gen_sym =
|
||||||
|
let counter = ref 0 in
|
||||||
|
fun () -> incr counter; "v" ^ string_of_int !counter
|
||||||
|
|
||||||
|
(* General tracing function *)
|
||||||
|
|
||||||
|
let trace text = function
|
||||||
|
None -> ()
|
||||||
|
| Some chan -> output_string chan text; flush chan
|
||||||
|
|
||||||
|
(* Printing a string in red to standard error *)
|
||||||
|
|
||||||
|
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
97
Utils.mli
Normal file
97
Utils.mli
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
(* Utility types and functions *)
|
||||||
|
|
||||||
|
(* Polymorphic identity function *)
|
||||||
|
|
||||||
|
val id : 'a -> 'a
|
||||||
|
|
||||||
|
(* Combinators *)
|
||||||
|
|
||||||
|
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
|
||||||
|
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
|
||||||
|
val lambda : 'a -> 'b -> 'a
|
||||||
|
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
|
||||||
|
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
|
||||||
|
|
||||||
|
(* Parametric rules for sequences
|
||||||
|
|
||||||
|
nseq: non-empty sequence;
|
||||||
|
sepseq: (possibly empty) sequence of separated items;
|
||||||
|
nsepseq: non-empty sequence of separated items.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'a nseq = 'a * 'a list
|
||||||
|
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
|
||||||
|
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
|
||||||
|
|
||||||
|
(* Consing *)
|
||||||
|
|
||||||
|
val nseq_cons : 'a -> 'a nseq -> 'a nseq
|
||||||
|
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
|
||||||
|
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
|
||||||
|
|
||||||
|
(* Reversing *)
|
||||||
|
|
||||||
|
val nseq_rev: 'a nseq -> 'a nseq
|
||||||
|
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
|
||||||
|
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
|
||||||
|
|
||||||
|
(* Rightwards iterators *)
|
||||||
|
|
||||||
|
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
|
||||||
|
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
|
||||||
|
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
|
||||||
|
|
||||||
|
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
|
||||||
|
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
|
||||||
|
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
|
||||||
|
|
||||||
|
(* Leftwards iterators *)
|
||||||
|
|
||||||
|
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
|
||||||
|
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
|
||||||
|
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
|
||||||
|
|
||||||
|
(* Conversions to lists *)
|
||||||
|
|
||||||
|
val nseq_to_list : 'a nseq -> 'a list
|
||||||
|
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
|
||||||
|
val sepseq_to_list : ('a,'b) sepseq -> 'a list
|
||||||
|
|
||||||
|
(* Effectful symbol generator *)
|
||||||
|
|
||||||
|
val gen_sym : unit -> string
|
||||||
|
|
||||||
|
(* General tracing function *)
|
||||||
|
|
||||||
|
val trace : string -> out_channel option -> unit
|
||||||
|
|
||||||
|
(* Printing a string in red to standard error *)
|
||||||
|
|
||||||
|
val highlight : string -> unit
|
||||||
|
|
||||||
|
(* Working with optional values *)
|
||||||
|
|
||||||
|
module Option :
|
||||||
|
sig
|
||||||
|
val apply : ('a -> 'b) -> 'a option -> 'b option
|
||||||
|
val rev_apply : ('a -> 'a) option -> 'a -> 'a
|
||||||
|
val to_string : string option -> string
|
||||||
|
end
|
||||||
|
|
||||||
|
(* An extension to the standard module [String] *)
|
||||||
|
|
||||||
|
module String :
|
||||||
|
sig
|
||||||
|
include module type of String
|
||||||
|
module Map : Map.S with type key = t
|
||||||
|
module Set : Set.S with type elt = t
|
||||||
|
end
|
||||||
|
|
||||||
|
(* Integer maps *)
|
||||||
|
|
||||||
|
module Int :
|
||||||
|
sig
|
||||||
|
type t = int
|
||||||
|
module Map : Map.S with type key = t
|
||||||
|
module Set : Set.S with type elt = t
|
||||||
|
end
|
1
Version.ml
Normal file
1
Version.ml
Normal file
@ -0,0 +1 @@
|
|||||||
|
let version = "7445e9c0"
|
38
dune
Normal file
38
dune
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
(ocamllex LexToken)
|
||||||
|
(ocamllex Lexer)
|
||||||
|
|
||||||
|
(menhir
|
||||||
|
(merge_into Parser)
|
||||||
|
(modules ParToken Parser)
|
||||||
|
(flags -la 1 --explain --external-tokens LexToken)
|
||||||
|
)
|
||||||
|
|
||||||
|
(executables
|
||||||
|
(names LexerMain ParserMain)
|
||||||
|
(public_names ligo-lexer ligo-parser)
|
||||||
|
(package ligo-parser)
|
||||||
|
(modules_without_implementation
|
||||||
|
Error
|
||||||
|
)
|
||||||
|
(libraries
|
||||||
|
hex
|
||||||
|
zarith
|
||||||
|
getopt
|
||||||
|
uutf
|
||||||
|
str
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(targets Parser.exe)
|
||||||
|
(deps ParserMain.exe)
|
||||||
|
(action (copy ParserMain.exe Parser.exe))
|
||||||
|
(mode promote-until-clean)
|
||||||
|
)
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(targets Lexer.exe)
|
||||||
|
(deps LexerMain.exe)
|
||||||
|
(action (copy LexerMain.exe Lexer.exe))
|
||||||
|
(mode promote-until-clean)
|
||||||
|
)
|
2
dune-project
Normal file
2
dune-project
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
(lang dune 1.7)
|
||||||
|
(using menhir 2.0)
|
19
ligo-parser.opam
Normal file
19
ligo-parser.opam
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
opam-version: "2.0"
|
||||||
|
version: "1.0"
|
||||||
|
maintainer: "gabriel.alfour@gmail.com"
|
||||||
|
authors: [ "Galfour" ]
|
||||||
|
homepage: "https://gitlab.com/gabriel.alfour/ligo-parser"
|
||||||
|
bug-reports: "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
|
||||||
|
dev-repo: "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
|
||||||
|
license: "MIT"
|
||||||
|
depends: [
|
||||||
|
"dune"
|
||||||
|
"menhir"
|
||||||
|
"hex" "zarith" "getopt" "uutf"
|
||||||
|
]
|
||||||
|
build: [
|
||||||
|
[ "dune" "build" "-p" name "-j" jobs ]
|
||||||
|
]
|
||||||
|
url {
|
||||||
|
src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user