initial commit

This commit is contained in:
Galfour 2019-02-25 21:29:29 +00:00
commit 9df0f6ad3a
36 changed files with 4958 additions and 0 deletions

1
.Lexer.ml.tag Normal file
View File

@ -0,0 +1 @@
ocamlc: -w -42

0
.LexerMain.tag Normal file
View File

1
.Parser.mly.tag Normal file
View File

@ -0,0 +1 @@
--explain --external-tokens LexToken --base Parser ParToken.mly

0
.ParserMain.tag Normal file
View File

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
_build/*
*/_build
*~
.merlin
*/.merlin
*.install

2
.links Normal file
View File

@ -0,0 +1,2 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg

916
AST.ml Normal file
View File

@ -0,0 +1,916 @@
(* Abstract Syntax Tree (AST) for Ligo *)
open Utils
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
let rec last to_region = function
[] -> Region.ghost
| [x] -> to_region x
| _::t -> last to_region t
let nseq_to_region to_region (hd,tl) =
Region.cover (to_region hd) (last to_region tl)
let nsepseq_to_region to_region (hd,tl) =
let reg (_,item) = to_region item in
Region.cover (to_region hd) (last reg tl)
let sepseq_to_region to_region = function
None -> Region.ghost
| Some seq -> nsepseq_to_region to_region seq
(* Keywords of Ligo *)
type kwd_begin = Region.t
type kwd_const = Region.t
type kwd_down = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_is = Region.t
type kwd_for = Region.t
type kwd_function = Region.t
type kwd_parameter = Region.t
type kwd_storage = Region.t
type kwd_type = Region.t
type kwd_of = Region.t
type kwd_operations = Region.t
type kwd_var = Region.t
type kwd_end = Region.t
type kwd_then = Region.t
type kwd_else = Region.t
type kwd_match = Region.t
type kwd_procedure = Region.t
type kwd_null = Region.t
type kwd_record = Region.t
type kwd_step = Region.t
type kwd_to = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_while = Region.t
type kwd_with = Region.t
(* Data constructors *)
type c_False = Region.t
type c_None = Region.t
type c_Some = Region.t
type c_True = Region.t
type c_Unit = Region.t
(* Symbols *)
type semi = Region.t
type comma = Region.t
type lpar = Region.t
type rpar = Region.t
type lbrace = Region.t
type rbrace = Region.t
type lbracket = Region.t
type rbracket = Region.t
type cons = Region.t
type vbar = Region.t
type arrow = Region.t
type asgnmnt = Region.t
type equal = Region.t
type colon = Region.t
type bool_or = Region.t
type bool_and = Region.t
type lt = Region.t
type leq = Region.t
type gt = Region.t
type geq = Region.t
type neq = Region.t
type plus = Region.t
type minus = Region.t
type slash = Region.t
type times = Region.t
type dot = Region.t
type wild = Region.t
type cat = Region.t
(* Virtual tokens *)
type eof = Region.t
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type map_name = string reg
type constr = string reg
(* Comma-separated non-empty lists *)
type 'a csv = ('a, comma) nsepseq
(* Bar-separated non-empty lists *)
type 'a bsv = ('a, vbar) nsepseq
(* Parentheses *)
type 'a par = (lpar * 'a * rpar) reg
(* Brackets compounds *)
type 'a brackets = (lbracket * 'a * rbracket) reg
(* Braced compounds *)
type 'a braces = (lbrace * 'a * rbrace) reg
(* The Abstract Syntax Tree *)
type t = <
types : type_decl list;
parameter : parameter_decl;
storage : storage_decl;
operations : operations_decl;
lambdas : lambda_decl list;
block : block reg;
eof : eof
>
and ast = t
and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
and storage_decl = (kwd_storage * type_expr) reg
and operations_decl = (kwd_operations * type_expr) reg
(* Type declarations *)
and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
and type_expr =
Prod of cartesian
| Sum of (variant, vbar) nsepseq reg
| Record of record_type
| TypeApp of (type_name * type_tuple) reg
| ParType of type_expr par
| TAlias of variable
and cartesian = (type_expr, times) nsepseq reg
and variant = (constr * kwd_of * cartesian) reg
and record_type = (kwd_record * field_decls * kwd_end) reg
and field_decls = (field_decl, semi) nsepseq
and field_decl = (variable * colon * type_expr) reg
and type_tuple = (type_name, comma) nsepseq par
(* Function and procedure declarations *)
and lambda_decl =
FunDecl of fun_decl reg
| ProcDecl of proc_decl reg
and fun_decl = <
kwd_function : kwd_function;
var : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
body : block reg;
kwd_with : kwd_with;
return : expr
>
and proc_decl = <
kwd_procedure : kwd_procedure;
var : variable;
param : parameters;
kwd_is : kwd_is;
body : block reg
>
and parameters = (param_decl, semi) nsepseq par
and param_decl = (var_kind * variable * colon * type_expr) reg
and var_kind =
Mutable of kwd_var
| Const of kwd_const
and block = <
decls : value_decls;
opening : kwd_begin;
instr : instructions;
close : kwd_end
>
and value_decls = (var_decl reg, semi) sepseq reg
and var_decl = <
kind : var_kind;
var : variable;
colon : colon;
vtype : type_expr;
setter : Region.t; (* "=" or ":=" *)
init : expr
>
and instructions = (instruction, semi) nsepseq reg
and instruction =
Single of single_instr
| Block of block reg
and single_instr =
Cond of conditional reg
| Match of match_instr reg
| Asgnmnt of asgnmnt_instr
| Loop of loop
| ProcCall of fun_call
| Null of kwd_null
and conditional = <
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : instruction;
kwd_else : kwd_else;
ifnot : instruction
>
and match_instr = <
kwd_match : kwd_match;
expr : expr;
kwd_with : kwd_with;
cases : cases;
kwd_end : kwd_end
>
and cases = (case, vbar) nsepseq reg
and case = (pattern * arrow * instruction) reg
and asgnmnt_instr = (variable * asgnmnt * expr) reg
and loop =
While of while_loop
| For of for_loop
and while_loop = (kwd_while * expr * block reg) reg
and for_loop =
ForInt of for_int reg
| ForCollect of for_collect reg
and for_int = <
kwd_for : kwd_for;
asgnmnt : asgnmnt_instr;
down : kwd_down option;
kwd_to : kwd_to;
bound : expr;
step : (kwd_step * expr) option;
block : block reg
>
and for_collect = <
kwd_for : kwd_for;
var : variable;
bind_to : (arrow * variable) option;
kwd_in : kwd_in;
expr : expr;
block : block reg
>
(* Expressions *)
and expr =
Or of (expr * bool_or * expr) reg
| And of (expr * bool_and * expr) reg
| Lt of (expr * lt * expr) reg
| Leq of (expr * leq * expr) reg
| Gt of (expr * gt * expr) reg
| Geq of (expr * geq * expr) reg
| Equal of (expr * equal * expr) reg
| Neq of (expr * neq * expr) reg
| Cat of (expr * cat * expr) reg
| Cons of (expr * cons * expr) reg
| Add of (expr * plus * expr) reg
| Sub of (expr * minus * expr) reg
| Mult of (expr * times * expr) reg
| Div of (expr * slash * expr) reg
| Mod of (expr * kwd_mod * expr) reg
| Neg of (minus * expr) reg
| Not of (kwd_not * expr) reg
| Int of (Lexer.lexeme * Z.t) reg
| Var of Lexer.lexeme reg
| String of Lexer.lexeme reg
| Bytes of (Lexer.lexeme * MBytes.t) reg
| False of c_False
| True of c_True
| Unit of c_Unit
| Tuple of tuple
| List of (expr, comma) nsepseq brackets
| EmptyList of empty_list
| Set of (expr, comma) nsepseq braces
| EmptySet of empty_set
| NoneExpr of none_expr
| FunCall of fun_call
| ConstrApp of constr_app
| SomeApp of (c_Some * arguments) reg
| MapLookUp of map_lookup reg
| ParExpr of expr par
and tuple = (expr, comma) nsepseq par
and empty_list =
(lbracket * rbracket * colon * type_expr) par
and empty_set =
(lbrace * rbrace * colon * type_expr) par
and none_expr =
(c_None * colon * type_expr) par
and fun_call = (fun_name * arguments) reg
and arguments = tuple
and constr_app = (constr * arguments) reg
and map_lookup = <
map_name : variable;
selector : dot;
index : expr brackets
>
(* Patterns *)
and pattern = (core_pattern, cons) nsepseq reg
and core_pattern =
PVar of Lexer.lexeme reg
| PWild of wild
| PInt of (Lexer.lexeme * Z.t) reg
| PBytes of (Lexer.lexeme * MBytes.t) reg
| PString of Lexer.lexeme reg
| PUnit of c_Unit
| PFalse of c_False
| PTrue of c_True
| PNone of c_None
| PSome of (c_Some * core_pattern par) reg
| PList of list_pattern
| PTuple of (core_pattern, comma) nsepseq par
and list_pattern =
Sugar of (core_pattern, comma) sepseq brackets
| Raw of (core_pattern * cons * pattern) par
(* Projecting regions *)
open Region
let type_expr_to_region = function
Prod node -> node.region
| Sum node -> node.region
| Record node -> node.region
| TypeApp node -> node.region
| ParType node -> node.region
| TAlias node -> node.region
let expr_to_region = function
Or {region; _}
| And {region; _}
| Lt {region; _}
| Leq {region; _}
| Gt {region; _}
| Geq {region; _}
| Equal {region; _}
| Neq {region; _}
| Cat {region; _}
| Cons {region; _}
| Add {region; _}
| Sub {region; _}
| Mult {region; _}
| Div {region; _}
| Mod {region; _}
| Neg {region; _}
| Not {region; _}
| Int {region; _}
| Var {region; _}
| String {region; _}
| Bytes {region; _}
| False region
| True region
| Unit region
| Tuple {region; _}
| List {region; _}
| EmptyList {region; _}
| Set {region; _}
| EmptySet {region; _}
| NoneExpr {region; _}
| FunCall {region; _}
| ConstrApp {region; _}
| SomeApp {region; _}
| MapLookUp {region; _}
| ParExpr {region; _} -> region
let var_kind_to_region = function
Mutable region
| Const region -> region
let instr_to_region = function
Single Cond {region;_}
| Single Match {region; _}
| Single Asgnmnt {region; _}
| Single Loop While {region; _}
| Single Loop For ForInt {region; _}
| Single Loop For ForCollect {region; _}
| Single ProcCall {region; _}
| Single Null region
| Block {region; _} -> region
let core_pattern_to_region = function
PVar {region; _}
| PWild region
| PInt {region; _}
| PBytes {region; _}
| PString {region; _}
| PUnit region
| PFalse region
| PTrue region
| PNone region
| PSome {region; _}
| PList Sugar {region; _}
| PList Raw {region; _}
| PTuple {region; _} -> region
(* Printing the tokens with their source regions *)
let printf = Printf.printf
let compact (region: Region.t) =
region#compact ~offsets:EvalOpt.offsets EvalOpt.mode
let print_nsepseq sep print (head,tail) =
let print_aux (sep_reg, item) =
printf "%s: %s\n" (compact sep_reg) sep;
print item
in print head; List.iter print_aux tail
let print_sepseq sep print = function
None -> ()
| Some seq -> print_nsepseq sep print seq
let print_token region lexeme =
printf "%s: %s\n"(compact region) lexeme
let print_var {region; value=lexeme} =
printf "%s: Ident \"%s\"\n" (compact region) lexeme
let print_constr {region; value=lexeme} =
printf "%s: Constr \"%s\"\n"
(compact region) lexeme
let print_string {region; value=lexeme} =
printf "%s: String \"%s\"\n"
(compact region) lexeme
let print_bytes {region; value = lexeme, abstract} =
printf "%s: Bytes (\"%s\", \"0x%s\")\n"
(compact region) lexeme
(MBytes.to_hex abstract |> Hex.to_string)
let print_int {region; value = lexeme, abstract} =
printf "%s: Int (\"%s\", %s)\n"
(compact region) lexeme
(Z.to_string abstract)
let rec print_tokens ast =
List.iter print_type_decl ast#types;
print_parameter_decl ast#parameter;
print_storage_decl ast#storage;
print_operations_decl ast#operations;
List.iter print_lambda_decl ast#lambdas;
print_block ast#block;
print_token ast#eof "EOF"
and print_parameter_decl {value=node; _} =
let kwd_parameter, variable, colon, type_expr = node in
print_token kwd_parameter "parameter";
print_var variable;
print_token colon ":";
print_type_expr type_expr
and print_storage_decl {value=node; _} =
let kwd_storage, type_expr = node in
print_token kwd_storage "storage";
print_type_expr type_expr
and print_operations_decl {value=node; _} =
let kwd_operations, type_expr = node in
print_token kwd_operations "operations";
print_type_expr type_expr
and print_type_decl {value=node; _} =
let kwd_type, type_name, kwd_is, type_expr = node in
print_token kwd_type "type";
print_var type_name;
print_token kwd_is "is";
print_type_expr type_expr
and print_type_expr = function
Prod cartesian -> print_cartesian cartesian
| Sum sum_type -> print_sum_type sum_type
| Record record_type -> print_record_type record_type
| TypeApp type_app -> print_type_app type_app
| ParType par_type -> print_par_type par_type
| TAlias type_alias -> print_var type_alias
and print_cartesian {value=sequence; _} =
print_nsepseq "*" print_type_expr sequence
and print_variant {value=node; _} =
let constr, kwd_of, cartesian = node in
print_constr constr;
print_token kwd_of "of";
print_cartesian cartesian
and print_sum_type {value=sequence; _} =
print_nsepseq "|" print_variant sequence
and print_record_type {value=node; _} =
let kwd_record, field_decls, kwd_end = node in
print_token kwd_record "record";
print_field_decls field_decls;
print_token kwd_end "end"
and print_type_app {value=node; _} =
let type_name, type_tuple = node in
print_var type_name;
print_type_tuple type_tuple
and print_par_type {value=node; _} =
let lpar, type_expr, rpar = node in
print_token lpar "(";
print_type_expr type_expr;
print_token rpar ")"
and print_field_decls sequence =
print_nsepseq ";" print_field_decl sequence
and print_field_decl {value=node; _} =
let var, colon, type_expr = node in
print_var var;
print_token colon ":";
print_type_expr type_expr
and print_type_tuple {value=node; _} =
let lpar, sequence, rpar = node in
print_token lpar "(";
print_nsepseq "," print_var sequence;
print_token rpar ")"
and print_lambda_decl = function
FunDecl fun_decl -> print_fun_decl fun_decl
| ProcDecl proc_decl -> print_proc_decl proc_decl
and print_fun_decl {value=node; _} =
print_token node#kwd_function "function";
print_var node#var;
print_parameters node#param;
print_token node#colon ":";
print_type_expr node#ret_type;
print_token node#kwd_is "is";
print_block node#body;
print_token node#kwd_with "with";
print_expr node#return
and print_proc_decl {value=node; _} =
print_token node#kwd_procedure "procedure";
print_var node#var;
print_parameters node#param;
print_token node#kwd_is "is";
print_block node#body
and print_parameters {value=node; _} =
let lpar, sequence, rpar = node in
print_token lpar "(";
print_nsepseq ";" print_param_decl sequence;
print_token rpar ")"
and print_param_decl {value=node; _} =
let var_kind, variable, colon, type_expr = node in
print_var_kind var_kind;
print_var variable;
print_token colon ":";
print_type_expr type_expr
and print_var_kind = function
Mutable kwd_var -> print_token kwd_var "var"
| Const kwd_const -> print_token kwd_const "const"
and print_block {value=node; _} =
print_value_decls node#decls;
print_token node#opening "begin";
print_instructions node#instr;
print_token node#close "end"
and print_value_decls {value=sequence; _} =
print_sepseq ";" print_var_decl sequence
and print_var_decl {value=node; _} =
let setter =
match node#kind with
Mutable _ -> ":="
| Const _ -> "=" in
print_var_kind node#kind;
print_var node#var;
print_token node#colon ":";
print_type_expr node#vtype;
print_token node#setter setter;
print_expr node#init
and print_instructions {value=sequence; _} =
print_nsepseq ";" print_instruction sequence
and print_instruction = function
Single instr -> print_single_instr instr
| Block block -> print_block block
and print_single_instr = function
Cond {value; _} -> print_conditional value
| Match {value; _} -> print_match_instr value
| Asgnmnt instr -> print_asgnmnt_instr instr
| Loop loop -> print_loop loop
| ProcCall fun_call -> print_fun_call fun_call
| Null kwd_null -> print_token kwd_null "null"
and print_conditional node =
print_token node#kwd_if "if";
print_expr node#test;
print_token node#kwd_then "then";
print_instruction node#ifso;
print_token node#kwd_else "else";
print_instruction node#ifnot
and print_match_instr node =
print_token node#kwd_match "match";
print_expr node#expr;
print_token node#kwd_with "with";
print_cases node#cases;
print_token node#kwd_end "end"
and print_cases {value=sequence; _} =
print_nsepseq "|" print_case sequence
and print_case {value=node; _} =
let pattern, arrow, instruction = node in
print_pattern pattern;
print_token arrow "->";
print_instruction instruction
and print_asgnmnt_instr {value=node; _} =
let variable, asgnmnt, expr = node in
print_var variable;
print_token asgnmnt ":=";
print_expr expr
and print_loop = function
While while_loop -> print_while_loop while_loop
| For for_loop -> print_for_loop for_loop
and print_while_loop {value=node; _} =
let kwd_while, expr, block = node in
print_token kwd_while "while";
print_expr expr;
print_block block
and print_for_loop = function
ForInt for_int -> print_for_int for_int
| ForCollect for_collect -> print_for_collect for_collect
and print_for_int {value=node; _} =
print_token node#kwd_for "for";
print_asgnmnt_instr node#asgnmnt;
print_down node#down;
print_token node#kwd_to "to";
print_expr node#bound;
print_step node#step;
print_block node#block
and print_down = function
Some kwd_down -> print_token kwd_down "down"
| None -> ()
and print_step = function
Some (kwd_step, expr) ->
print_token kwd_step "step";
print_expr expr
| None -> ()
and print_for_collect {value=node; _} =
print_token node#kwd_for "for";
print_var node#var;
print_bind_to node#bind_to;
print_token node#kwd_in "in";
print_expr node#expr;
print_block node#block
and print_bind_to = function
Some (arrow, variable) ->
print_token arrow "->";
print_var variable
| None -> ()
and print_expr = function
Or {value = expr1, bool_or, expr2; _} ->
print_expr expr1; print_token bool_or "||"; print_expr expr2
| And {value = expr1, bool_and, expr2; _} ->
print_expr expr1; print_token bool_and "&&"; print_expr expr2
| Lt {value = expr1, lt, expr2; _} ->
print_expr expr1; print_token lt "<"; print_expr expr2
| Leq {value = expr1, leq, expr2; _} ->
print_expr expr1; print_token leq "<="; print_expr expr2
| Gt {value = expr1, gt, expr2; _} ->
print_expr expr1; print_token gt ">"; print_expr expr2
| Geq {value = expr1, geq, expr2; _} ->
print_expr expr1; print_token geq ">="; print_expr expr2
| Equal {value = expr1, equal, expr2; _} ->
print_expr expr1; print_token equal "="; print_expr expr2
| Neq {value = expr1, neq, expr2; _} ->
print_expr expr1; print_token neq "=/="; print_expr expr2
| Cat {value = expr1, cat, expr2; _} ->
print_expr expr1; print_token cat "^"; print_expr expr2
| Cons {value = expr1, cons, expr2; _} ->
print_expr expr1; print_token cons "<:"; print_expr expr2
| Add {value = expr1, add, expr2; _} ->
print_expr expr1; print_token add "+"; print_expr expr2
| Sub {value = expr1, sub, expr2; _} ->
print_expr expr1; print_token sub "-"; print_expr expr2
| Mult {value = expr1, mult, expr2; _} ->
print_expr expr1; print_token mult "*"; print_expr expr2
| Div {value = expr1, div, expr2; _} ->
print_expr expr1; print_token div "/"; print_expr expr2
| Mod {value = expr1, kwd_mod, expr2; _} ->
print_expr expr1; print_token kwd_mod "mod"; print_expr expr2
| Neg {value = minus, expr; _} ->
print_token minus "-"; print_expr expr
| Not {value = kwd_not, expr; _} ->
print_token kwd_not "not"; print_expr expr
| Int i -> print_int i
| Var v -> print_var v
| String s -> print_string s
| Bytes b -> print_bytes b
| False region -> print_token region "False"
| True region -> print_token region "True"
| Unit region -> print_token region "Unit"
| Tuple tuple -> print_tuple tuple
| List list -> print_list list
| EmptyList elist -> print_empty_list elist
| Set set -> print_set set
| EmptySet eset -> print_empty_set eset
| NoneExpr nexpr -> print_none_expr nexpr
| FunCall fun_call -> print_fun_call fun_call
| ConstrApp capp -> print_constr_app capp
| SomeApp sapp -> print_some_app sapp
| MapLookUp lookup -> print_map_lookup lookup
| ParExpr pexpr -> print_par_expr pexpr
and print_tuple {value=node; _} =
let lpar, sequence, rpar = node in
print_token lpar "(";
print_nsepseq "," print_expr sequence;
print_token rpar ")"
and print_list {value=node; _} =
let lbra, sequence, rbra = node in
print_token lbra "[";
print_nsepseq "," print_expr sequence;
print_token rbra "]"
and print_empty_list {value=node; _} =
let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in
print_token lpar "(";
print_token lbracket "[";
print_token rbracket "]";
print_token colon ":";
print_type_expr type_expr;
print_token rpar ")"
and print_set {value=node; _} =
let lbrace, sequence, rbrace = node in
print_token lbrace "{";
print_nsepseq "," print_expr sequence;
print_token rbrace "}"
and print_empty_set {value=node; _} =
let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in
print_token lpar "(";
print_token lbrace "{";
print_token rbrace "}";
print_token colon ":";
print_type_expr type_expr;
print_token rpar ")"
and print_none_expr {value=node; _} =
let lpar, (c_None, colon, type_expr), rpar = node in
print_token lpar "(";
print_token c_None "None";
print_token colon ":";
print_type_expr type_expr;
print_token rpar ")"
and print_fun_call {value=node; _} =
let fun_name, arguments = node in
print_var fun_name;
print_tuple arguments
and print_constr_app {value=node; _} =
let constr, arguments = node in
print_constr constr;
print_tuple arguments
and print_some_app {value=node; _} =
let c_Some, arguments = node in
print_token c_Some "Some";
print_tuple arguments
and print_map_lookup {value=node; _} =
let {value = lbracket, expr, rbracket; _} = node#index in
print_var node#map_name;
print_token node#selector ".";
print_token lbracket "[";
print_expr expr;
print_token rbracket "]"
and print_par_expr {value=node; _} =
let lpar, expr, rpar = node in
print_token lpar "(";
print_expr expr;
print_token rpar ")"
and print_pattern {value=sequence; _} =
print_nsepseq "<:" print_core_pattern sequence
and print_core_pattern = function
PVar var -> print_var var
| PWild wild -> print_token wild "_"
| PInt i -> print_int i
| PBytes b -> print_bytes b
| PString s -> print_string s
| PUnit region -> print_token region "Unit"
| PFalse region -> print_token region "False"
| PTrue region -> print_token region "True"
| PNone region -> print_token region "None"
| PSome psome -> print_psome psome
| PList pattern -> print_list_pattern pattern
| PTuple ptuple -> print_ptuple ptuple
and print_psome {value=node; _} =
let c_Some, patterns = node in
print_token c_Some "Some";
print_patterns patterns
and print_patterns {value=node; _} =
let lpar, core_pattern, rpar = node in
print_token lpar "(";
print_core_pattern core_pattern;
print_token rpar ")"
and print_list_pattern = function
Sugar sugar -> print_sugar sugar
| Raw raw -> print_raw raw
and print_sugar {value=node; _} =
let lbracket, sequence, rbracket = node in
print_token lbracket "[";
print_sepseq "," print_core_pattern sequence;
print_token rbracket "]"
and print_raw {value=node; _} =
let lpar, (core_pattern, cons, pattern), rpar = node in
print_token lpar "(";
print_core_pattern core_pattern;
print_token cons "<:";
print_pattern pattern;
print_token rpar ")"
and print_ptuple {value=node; _} =
let lpar, sequence, rpar = node in
print_token lpar "(";
print_nsepseq "," print_core_pattern sequence;
print_token rpar ")"

384
AST.mli Normal file
View File

@ -0,0 +1,384 @@
(* Abstract Syntax Tree (AST) for Ligo *)
open Utils
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
(* Keywords of Ligo *)
type kwd_begin = Region.t
type kwd_const = Region.t
type kwd_down = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_is = Region.t
type kwd_for = Region.t
type kwd_function = Region.t
type kwd_parameter = Region.t
type kwd_storage = Region.t
type kwd_type = Region.t
type kwd_of = Region.t
type kwd_operations = Region.t
type kwd_var = Region.t
type kwd_end = Region.t
type kwd_then = Region.t
type kwd_else = Region.t
type kwd_match = Region.t
type kwd_procedure = Region.t
type kwd_null = Region.t
type kwd_record = Region.t
type kwd_step = Region.t
type kwd_to = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_while = Region.t
type kwd_with = Region.t
(* Data constructors *)
type c_False = Region.t
type c_None = Region.t
type c_Some = Region.t
type c_True = Region.t
type c_Unit = Region.t
(* Symbols *)
type semi = Region.t
type comma = Region.t
type lpar = Region.t
type rpar = Region.t
type lbrace = Region.t
type rbrace = Region.t
type lbracket = Region.t
type rbracket = Region.t
type cons = Region.t
type vbar = Region.t
type arrow = Region.t
type asgnmnt = Region.t
type equal = Region.t
type colon = Region.t
type bool_or = Region.t
type bool_and = Region.t
type lt = Region.t
type leq = Region.t
type gt = Region.t
type geq = Region.t
type neq = Region.t
type plus = Region.t
type minus = Region.t
type slash = Region.t
type times = Region.t
type dot = Region.t
type wild = Region.t
type cat = Region.t
(* Virtual tokens *)
type eof = Region.t
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type map_name = string reg
type constr = string reg
(* Comma-separated non-empty lists *)
type 'a csv = ('a, comma) nsepseq
(* Bar-separated non-empty lists *)
type 'a bsv = ('a, vbar) nsepseq
(* Parentheses *)
type 'a par = (lpar * 'a * rpar) reg
(* Brackets compounds *)
type 'a brackets = (lbracket * 'a * rbracket) reg
(* Braced compounds *)
type 'a braces = (lbrace * 'a * rbrace) reg
(* The Abstract Syntax Tree *)
type t = <
types : type_decl list;
parameter : parameter_decl;
storage : storage_decl;
operations : operations_decl;
lambdas : lambda_decl list;
block : block reg;
eof : eof
>
and ast = t
and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
and storage_decl = (kwd_storage * type_expr) reg
and operations_decl = (kwd_operations * type_expr) reg
(* Type declarations *)
and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
and type_expr =
Prod of cartesian
| Sum of (variant, vbar) nsepseq reg
| Record of record_type
| TypeApp of (type_name * type_tuple) reg
| ParType of type_expr par
| TAlias of variable
and cartesian = (type_expr, times) nsepseq reg
and variant = (constr * kwd_of * cartesian) reg
and record_type = (kwd_record * field_decls * kwd_end) reg
and field_decls = (field_decl, semi) nsepseq
and field_decl = (variable * colon * type_expr) reg
and type_tuple = (type_name, comma) nsepseq par
(* Function and procedure declarations *)
and lambda_decl =
FunDecl of fun_decl reg
| ProcDecl of proc_decl reg
and fun_decl = <
kwd_function : kwd_function;
var : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
body : block reg;
kwd_with : kwd_with;
return : expr
>
and proc_decl = <
kwd_procedure : kwd_procedure;
var : variable;
param : parameters;
kwd_is : kwd_is;
body : block reg
>
and parameters = (param_decl, semi) nsepseq par
and param_decl = (var_kind * variable * colon * type_expr) reg
and var_kind =
Mutable of kwd_var
| Const of kwd_const
and block = <
decls : value_decls;
opening : kwd_begin;
instr : instructions;
close : kwd_end
>
and value_decls = (var_decl reg, semi) sepseq reg
and var_decl = <
kind : var_kind;
var : variable;
colon : colon;
vtype : type_expr;
setter : Region.t; (* "=" or ":=" *)
init : expr
>
and instructions = (instruction, semi) nsepseq reg
and instruction =
Single of single_instr
| Block of block reg
and single_instr =
Cond of conditional reg
| Match of match_instr reg
| Asgnmnt of asgnmnt_instr
| Loop of loop
| ProcCall of fun_call
| Null of kwd_null
and conditional = <
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : instruction;
kwd_else : kwd_else;
ifnot : instruction
>
and match_instr = <
kwd_match : kwd_match;
expr : expr;
kwd_with : kwd_with;
cases : cases;
kwd_end : kwd_end
>
and cases = (case, vbar) nsepseq reg
and case = (pattern * arrow * instruction) reg
and asgnmnt_instr = (variable * asgnmnt * expr) reg
and loop =
While of while_loop
| For of for_loop
and while_loop = (kwd_while * expr * block reg) reg
and for_loop =
ForInt of for_int reg
| ForCollect of for_collect reg
and for_int = <
kwd_for : kwd_for;
asgnmnt : asgnmnt_instr;
down : kwd_down option;
kwd_to : kwd_to;
bound : expr;
step : (kwd_step * expr) option;
block : block reg
>
and for_collect = <
kwd_for : kwd_for;
var : variable;
bind_to : (arrow * variable) option;
kwd_in : kwd_in;
expr : expr;
block : block reg
>
(* Expressions *)
and expr =
Or of (expr * bool_or * expr) reg
| And of (expr * bool_and * expr) reg
| Lt of (expr * lt * expr) reg
| Leq of (expr * leq * expr) reg
| Gt of (expr * gt * expr) reg
| Geq of (expr * geq * expr) reg
| Equal of (expr * equal * expr) reg
| Neq of (expr * neq * expr) reg
| Cat of (expr * cat * expr) reg
| Cons of (expr * cons * expr) reg
| Add of (expr * plus * expr) reg
| Sub of (expr * minus * expr) reg
| Mult of (expr * times * expr) reg
| Div of (expr * slash * expr) reg
| Mod of (expr * kwd_mod * expr) reg
| Neg of (minus * expr) reg
| Not of (kwd_not * expr) reg
| Int of (Lexer.lexeme * Z.t) reg
| Var of Lexer.lexeme reg
| String of Lexer.lexeme reg
| Bytes of (Lexer.lexeme * MBytes.t) reg
| False of c_False
| True of c_True
| Unit of c_Unit
| Tuple of tuple
| List of (expr, comma) nsepseq brackets
| EmptyList of empty_list
| Set of (expr, comma) nsepseq braces
| EmptySet of empty_set
| NoneExpr of none_expr
| FunCall of fun_call
| ConstrApp of constr_app
| SomeApp of (c_Some * arguments) reg
| MapLookUp of map_lookup reg
| ParExpr of expr par
and tuple = (expr, comma) nsepseq par
and empty_list =
(lbracket * rbracket * colon * type_expr) par
and empty_set =
(lbrace * rbrace * colon * type_expr) par
and none_expr =
(c_None * colon * type_expr) par
and fun_call = (fun_name * arguments) reg
and arguments = tuple
and constr_app = (constr * arguments) reg
and map_lookup = <
map_name : variable;
selector : dot;
index : expr brackets
>
(* Patterns *)
and pattern = (core_pattern, cons) nsepseq reg
and core_pattern =
PVar of Lexer.lexeme reg
| PWild of wild
| PInt of (Lexer.lexeme * Z.t) reg
| PBytes of (Lexer.lexeme * MBytes.t) reg
| PString of Lexer.lexeme reg
| PUnit of c_Unit
| PFalse of c_False
| PTrue of c_True
| PNone of c_None
| PSome of (c_Some * core_pattern par) reg
| PList of list_pattern
| PTuple of (core_pattern, comma) nsepseq par
and list_pattern =
Sugar of (core_pattern, comma) sepseq brackets
| Raw of (core_pattern * cons * pattern) par
(* Projecting regions *)
val type_expr_to_region : type_expr -> Region.t
val expr_to_region : expr -> Region.t
val var_kind_to_region : var_kind -> Region.t
val instr_to_region : instruction -> Region.t
val core_pattern_to_region : core_pattern -> Region.t
(* Printing *)
val print_tokens : t -> unit

3
Error.mli Normal file
View File

@ -0,0 +1,3 @@
type t = ..
type error = t

143
EvalOpt.ml Normal file
View File

@ -0,0 +1,143 @@
(* Parsing the command-line option for testing the Ligo lexer and
parser *)
let printf = Printf.printf
let sprintf = Printf.sprintf
let abort msg =
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.li | \"-\"]\n" file;
print_endline "where <input>.li is the Ligo source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:";
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
print_endline " -t, --tokens Print tokens (lexer)";
print_endline " -u, --units Print tokens and markup (lexer)";
print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations";
print_endline " -v, --verbose=<stage> cmdline, parser";
print_endline " -h, --help This help";
exit 0
(* Specifying the command-line options a la GNU *)
let copy = ref false
and tokens = ref false
and units = ref false
and quiet = ref false
and columns = ref false
and bytes = ref false
and verbose = ref Utils.String.Set.empty
and input = ref None
let split_at_colon = Str.(split (regexp ":"))
let add_verbose d =
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
!verbose
(split_at_colon d)
let specs =
let open! Getopt in [
'c', "copy", set copy true, None;
't', "tokens", set tokens true, None;
'u', "units", set units true, None;
'q', "quiet", set quiet true, None;
noshort, "columns", set columns true, None;
noshort, "bytes", set bytes true, None;
'v', "verbose", None, Some add_verbose;
'h', "help", Some help, None
]
;;
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
;;
(* Parsing the command-line options *)
try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
;;
(* Checking options and exporting them as non-mutable values *)
type command = Quiet | Copy | Units | Tokens
let cmd =
match !quiet, !copy, !units, !tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
let quote s = sprintf "\"%s\"" s
let verbose_str =
let apply e a =
if a <> "" then sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "copy = %b\n" !copy;
printf "tokens = %b\n" !tokens;
printf "units = %b\n" !units;
printf "quiet = %b\n" !quiet;
printf "columns = %b\n" !columns;
printf "bytes = %b\n" !bytes;
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote !input)
;;
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
let input =
match !input with
None | Some "-" -> !input
| Some file_path ->
if Filename.check_suffix file_path ".li"
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .ti."
(* Exporting remaining options as non-mutable values *)
let copy = !copy
and tokens = !tokens
and units = !units
and quiet = !quiet
and offsets = not !columns
and mode = if !bytes then `Byte else `Point
and verbose = !verbose
;;
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "copy = %b\n" copy;
printf "tokens = %b\n" tokens;
printf "units = %b\n" units;
printf "quiet = %b\n" quiet;
printf "offsets = %b\n" offsets;
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote input)
end
;;

42
EvalOpt.mli Normal file
View File

@ -0,0 +1,42 @@
(* Parsing the command-line option for testing the Ligo lexer and
parser *)
(* If the value [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets. *)
val offsets : bool
(* If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points. *)
val mode : [`Byte | `Point]
(* If the option [verbose] is set to a list of predefined stages of
the compiler chain, then more information may be displayed about
those stages. *)
val verbose : Utils.String.Set.t
(* If the value [input] is [None] or [Some "-"], the input is standard
input. If [Some f], then the input is the file whose name (file
path) is [f]. *)
val input : string option
(* If the value [cmd] is
* [Quiet], then no output from the lexer and parser should be
expected, safe error messages: this is the default value;
* [Copy], then lexemes of tokens and markup will be printed to
standard output, with the expectation of a perfect match with
the input file;
* [Units], then the tokens and markup will be printed to standard
output, that is, the abstract representation of the concrete
lexical syntax;
* [Tokens], then the tokens only will be printed.
*)
type command = Quiet | Copy | Units | Tokens
val cmd : command

19
FQueue.ml Normal file
View File

@ -0,0 +1,19 @@
(* Purely functional queues based on a pair of lists *)
type 'a t = {rear: 'a list; front: 'a list}
let empty = {rear=[]; front=[]}
let enq x q = {q with rear = x::q.rear}
let rec deq = function
{rear=[]; front= []} -> None
| {rear; front= []} -> deq {rear=[]; front = List.rev rear}
| {rear; front=x::f} -> Some ({rear; front=f}, x)
let rec peek = function
{rear=[]; front= []} -> None
| {rear; front= []} -> peek {rear=[]; front = List.rev rear}
| {rear=_; front=x::_} as q -> Some (q,x)
let is_empty q = (q = empty)

17
FQueue.mli Normal file
View File

@ -0,0 +1,17 @@
(* Purely functional queues *)
type 'a t
val empty : 'a t
val enq : 'a -> 'a t -> 'a t
val deq : 'a t -> ('a t * 'a) option
val is_empty : 'a t -> bool
(* The call [peek q] is [None] if the queue [q] is empty, and,
otherwise, is a pair made of a queue and the next item in it to be
dequeued. The returned queue contains the same items as [q], in the
same order, but more efficient, in general, to use in further
calls. *)
val peek : 'a t -> ('a t * 'a) option

149
LexToken.mli Normal file
View File

@ -0,0 +1,149 @@
(* This signature defines the lexical tokens for Ligo
_Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of
tokens is the minimal model of the input program, carrying
implicitly all its structure in a linear encoding, and nothing
else, in particular, comments and whitespace are absent.
A _lexeme_ is a specific character string (concrete
representation) denoting a token (abstract representation). Tokens
can be thought of as sets, and lexemes as elements of those sets --
there is often an infinite number of lexemes, but a small number of
tokens. (Think of identifiers as lexemes and one token.)
The tokens are qualified here as being "lexical" because the
parser generator Menhir expects to define them, in which context
they are called "parsing tokens", and they are made to match each
other. (This is an idiosyncratic terminology.)
The type of the lexical tokens is the variant [t], also
aliased to [token].
*)
type lexeme = string
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * MBytes.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t (* ";" *)
| COMMA of Region.t (* "," *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| CONS of Region.t (* "<:" *)
| VBAR of Region.t (* "|" *)
| ARROW of Region.t (* "->" *)
| ASGNMNT of Region.t (* ":=" *)
| EQUAL of Region.t (* "=" *)
| COLON of Region.t (* ":" *)
| OR of Region.t (* "||" *)
| AND of Region.t (* "&&" *)
| LT of Region.t (* "<" *)
| LEQ of Region.t (* "<=" *)
| GT of Region.t (* ">" *)
| GEQ of Region.t (* ">=" *)
| NEQ of Region.t (* "=/=" *)
| PLUS of Region.t (* "+" *)
| MINUS of Region.t (* "-" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
| DOT of Region.t (* "." *)
| WILD of Region.t (* "_" *)
| CAT of Region.t (* "^" *)
(* Keywords *)
| Begin of Region.t
| Const of Region.t
| Down of Region.t
| If of Region.t
| In of Region.t
| Is of Region.t
| For of Region.t
| Function of Region.t
| Parameter of Region.t
| Storage of Region.t
| Type of Region.t
| Of of Region.t
| Operations of Region.t
| Var of Region.t
| End of Region.t
| Then of Region.t
| Else of Region.t
| Match of Region.t
| Null of Region.t
| Procedure of Region.t
| Record of Region.t
| Step of Region.t
| To of Region.t
| Mod of Region.t
| Not of Region.t
| While of Region.t
| With of Region.t
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
(* Projections
The difference between extracting the lexeme and a string from a
token is that the latter is the textual representation of the OCaml
value denoting the token (its abstract syntax), rather than its
lexeme (concrete syntax).
*)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Injections *)
type int_err =
Non_canonical_zero
type ident_err = Reserved_name
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool

616
LexToken.mll Normal file
View File

@ -0,0 +1,616 @@
(* Lexer specification for Ligo, to be processed by [ocamllex] *)
{
(* START HEADER *)
(* Shorthands *)
type lexeme = string
let sprintf = Printf.sprintf
module SMap = Utils.String.Map
module SSet = Utils.String.Set
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
*)
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * MBytes.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t
| COMMA of Region.t
| LPAR of Region.t
| RPAR of Region.t
| LBRACE of Region.t
| RBRACE of Region.t
| LBRACKET of Region.t
| RBRACKET of Region.t
| CONS of Region.t
| VBAR of Region.t
| ARROW of Region.t
| ASGNMNT of Region.t
| EQUAL of Region.t
| COLON of Region.t
| OR of Region.t
| AND of Region.t
| LT of Region.t
| LEQ of Region.t
| GT of Region.t
| GEQ of Region.t
| NEQ of Region.t
| PLUS of Region.t
| MINUS of Region.t
| SLASH of Region.t
| TIMES of Region.t
| DOT of Region.t
| WILD of Region.t
| CAT of Region.t
(* Keywords *)
| Begin of Region.t
| Const of Region.t
| Down of Region.t
| If of Region.t
| In of Region.t
| Is of Region.t
| For of Region.t
| Function of Region.t
| Parameter of Region.t
| Storage of Region.t
| Type of Region.t
| Of of Region.t
| Operations of Region.t
| Var of Region.t
| End of Region.t
| Then of Region.t
| Else of Region.t
| Match of Region.t
| Null of Region.t
| Procedure of Region.t
| Record of Region.t
| Step of Region.t
| To of Region.t
| Mod of Region.t
| Not of Region.t
| While of Region.t
| With of Region.t
(* Types *)
(*
| T_address of Region.t (* "address" *)
| T_big_map of Region.t (* "big_map" *)
| T_bool of Region.t (* "bool" *)
| T_bytes of Region.t (* "bytes" *)
| T_contract of Region.t (* "contract" *)
| T_int of Region.t (* "int" *)
| T_key of Region.t (* "key" *)
| T_key_hash of Region.t (* "key_hash" *)
| T_list of Region.t (* "list" *)
| T_map of Region.t (* "map" *)
| T_mutez of Region.t (* "mutez" *)
| T_nat of Region.t (* "nat" *)
| T_operation of Region.t (* "operation" *)
| T_option of Region.t (* "option" *)
| T_set of Region.t (* "set" *)
| T_signature of Region.t (* "signature" *)
| T_string of Region.t (* "string" *)
| T_timestamp of Region.t (* "timestamp" *)
| T_unit of Region.t (* "unit" *)
*)
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
let proj_token = function
(* Literals *)
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (MBytes.to_hex b |> Hex.to_string)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident \"%s\"" value
| Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value
(* Symbols *)
| SEMI region -> region, "SEMI"
| COMMA region -> region, "COMMA"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| CONS region -> region, "CONS"
| VBAR region -> region, "VBAR"
| ARROW region -> region, "ARROW"
| ASGNMNT region -> region, "ASGNMNT"
| EQUAL region -> region, "EQUAL"
| COLON region -> region, "COLON"
| OR region -> region, "OR"
| AND region -> region, "AND"
| LT region -> region, "LT"
| LEQ region -> region, "LEQ"
| GT region -> region, "GT"
| GEQ region -> region, "GEQ"
| NEQ region -> region, "NEQ"
| PLUS region -> region, "PLUS"
| MINUS region -> region, "MINUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| CAT region -> region, "CAT"
(* Keywords *)
| Begin region -> region, "Begin"
| Const region -> region, "Const"
| Down region -> region, "Down"
| If region -> region, "If"
| In region -> region, "In"
| Is region -> region, "Is"
| For region -> region, "For"
| Function region -> region, "Function"
| Parameter region -> region, "Parameter"
| Storage region -> region, "Storage"
| Type region -> region, "Type"
| Of region -> region, "Of"
| Operations region -> region, "Operations"
| Var region -> region, "Var"
| End region -> region, "End"
| Then region -> region, "Then"
| Else region -> region, "Else"
| Match region -> region, "Match"
| Null region -> region, "Null"
| Procedure region -> region, "Procedure"
| Record region -> region, "Record"
| Step region -> region, "Step"
| To region -> region, "To"
| Mod region -> region, "Mod"
| Not region -> region, "Not"
| While region -> region, "While"
| With region -> region, "With"
(* Data *)
| C_False region -> region, "C_False"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| C_True region -> region, "C_True"
| C_Unit region -> region, "C_Unit"
(* Virtual tokens *)
| EOF region -> region, "EOF"
let to_lexeme = function
(* Literals *)
String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Int i -> fst i.Region.value
| Ident id
| Constr id -> id.Region.value
(* Symbols *)
| SEMI _ -> ";"
| COMMA _ -> ","
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| CONS _ -> "<:"
| VBAR _ -> "|"
| ARROW _ -> "->"
| ASGNMNT _ -> ":="
| EQUAL _ -> "="
| COLON _ -> ":"
| OR _ -> "||"
| AND _ -> "&&"
| LT _ -> "<"
| LEQ _ -> "<="
| GT _ -> ">"
| GEQ _ -> ">="
| NEQ _ -> "=/="
| PLUS _ -> "+"
| MINUS _ -> "-"
| SLASH _ -> "/"
| TIMES _ -> "*"
| DOT _ -> "."
| WILD _ -> "_"
| CAT _ -> "^"
(* Keywords *)
| Begin _ -> "begin"
| Const _ -> "const"
| Down _ -> "down"
| If _ -> "if"
| In _ -> "in"
| Is _ -> "is"
| For _ -> "for"
| Function _ -> "function"
| Parameter _ -> "parameter"
| Storage _ -> "storage"
| Type _ -> "type"
| Of _ -> "of"
| Operations _ -> "operations"
| Var _ -> "var"
| End _ -> "end"
| Then _ -> "then"
| Else _ -> "else"
| Match _ -> "match"
| Null _ -> "null"
| Procedure _ -> "procedure"
| Record _ -> "record"
| Step _ -> "step"
| To _ -> "to"
| Mod _ -> "mod"
| Not _ -> "not"
| While _ -> "while"
| With _ -> "with"
(* Data constructors *)
| C_False _ -> "False"
| C_None _ -> "None"
| C_Some _ -> "Some"
| C_True _ -> "True"
| C_Unit _ -> "Unit"
(* Virtual tokens *)
| EOF _ -> ""
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str
let to_region token = proj_token token |> fst
(* LEXIS *)
let keywords = [
(fun reg -> Begin reg);
(fun reg -> Const reg);
(fun reg -> Down reg);
(fun reg -> If reg);
(fun reg -> In reg);
(fun reg -> Is reg);
(fun reg -> For reg);
(fun reg -> Function reg);
(fun reg -> Parameter reg);
(fun reg -> Storage reg);
(fun reg -> Type reg);
(fun reg -> Of reg);
(fun reg -> Operations reg);
(fun reg -> Var reg);
(fun reg -> End reg);
(fun reg -> Then reg);
(fun reg -> Else reg);
(fun reg -> Match reg);
(fun reg -> Null reg);
(fun reg -> Procedure reg);
(fun reg -> Record reg);
(fun reg -> Step reg);
(fun reg -> To reg);
(fun reg -> Mod reg);
(fun reg -> Not reg);
(fun reg -> While reg);
(fun reg -> With reg)
]
let reserved =
let open SSet in
empty |> add "and"
|> add "as"
|> add "asr"
|> add "assert"
|> add "class"
|> add "constraint"
|> add "do"
|> add "done"
|> add "downto"
|> add "exception"
|> add "external"
|> add "false"
|> add "fun"
|> add "functor"
|> add "include"
|> add "inherit"
|> add "initializer"
|> add "land"
|> add "lazy"
|> add "let"
|> add "lor"
|> add "lsl"
|> add "lsr"
|> add "lxor"
|> add "method"
|> add "module"
|> add "mutable"
|> add "new"
|> add "nonrec"
|> add "object"
|> add "open"
|> add "or"
|> add "private"
|> add "rec"
|> add "sig"
|> add "struct"
|> add "true"
|> add "try"
|> add "val"
|> add "virtual"
|> add "when"
let constructors = [
(fun reg -> C_False reg);
(fun reg -> C_None reg);
(fun reg -> C_Some reg);
(fun reg -> C_True reg);
(fun reg -> C_Unit reg)
]
let add map (key, value) = SMap.add key value map
let mk_map mk_key list =
let apply map value = add map (mk_key value, value)
in List.fold_left apply SMap.empty list
type lexis = {
kwd : (Region.t -> token) SMap.t;
cstr : (Region.t -> token) SMap.t;
res : SSet.t
}
let lexicon : lexis =
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
in {kwd = build keywords;
cstr = build constructors;
res = reserved}
(* Identifiers *)
type ident_err = Reserved_name
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let digit = ['0'-'9']
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
(* Rules *)
rule scan_ident region lexicon = parse
(ident as value) eof {
if SSet.mem value lexicon.res
then Error Reserved_name
else Ok (match SMap.find_opt value lexicon.kwd with
Some mk_kwd -> mk_kwd region
| None -> Ident Region.{region; value}) }
and scan_constr region lexicon = parse
(constr as value) eof {
match SMap.find_opt value lexicon.cstr with
Some mk_cstr -> mk_cstr region
| None -> Constr Region.{region; value} }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Smart constructors (injections) *)
let mk_string lexeme region = String Region.{region; value=lexeme}
let mk_bytes lexeme region =
let norm = Str.(global_replace (regexp "_") "" lexeme) in
let value = lexeme, MBytes.of_hex (Hex.of_string norm)
in Bytes Region.{region; value}
type int_err = Non_canonical_zero
let mk_int lexeme region =
let z = Str.(global_replace (regexp "_") "" lexeme)
|> Z.of_string in
if Z.equal z Z.zero && lexeme <> "0"
then Error Non_canonical_zero
else Ok (Int Region.{region; value = lexeme, z})
let eof region = EOF region
let mk_sym lexeme region =
match lexeme with
";" -> SEMI region
| "," -> COMMA region
| "(" -> LPAR region
| ")" -> RPAR region
| "{" -> LBRACE region
| "}" -> RBRACE region
| "[" -> LBRACKET region
| "]" -> RBRACKET region
| "<:" -> CONS region
| "|" -> VBAR region
| "->" -> ARROW region
| ":=" -> ASGNMNT region
| "=" -> EQUAL region
| ":" -> COLON region
| "||" -> OR region
| "&&" -> AND region
| "<" -> LT region
| "<=" -> LEQ region
| ">" -> GT region
| ">=" -> GEQ region
| "=/=" -> NEQ region
| "+" -> PLUS region
| "-" -> MINUS region
| "/" -> SLASH region
| "*" -> TIMES region
| "." -> DOT region
| "_" -> WILD region
| "^" -> CAT region
| _ -> assert false
(* Identifiers *)
let mk_ident' lexeme region lexicon =
Lexing.from_string lexeme |> scan_ident region lexicon
let mk_ident lexeme region = mk_ident' lexeme region lexicon
(* Constructors *)
let mk_constr' lexeme region lexicon =
Lexing.from_string lexeme |> scan_constr region lexicon
let mk_constr lexeme region = mk_constr' lexeme region lexicon
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
| Begin _
| Const _
| Down _
| If _
| In _
| Is _
| For _
| Function _
| Parameter _
| Storage _
| Type _
| Of _
| Operations _
| Var _
| End _
| Then _
| Else _
| Match _
| Null _
| Procedure _
| Record _
| Step _
| To _
| Mod _
| Not _
| While _
| With _ -> true
| _ -> false
let is_constr = function
Constr _
| C_False _
| C_None _
| C_Some _
| C_True _
| C_Unit _ -> true
| _ -> false
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASGNMNT _
| EQUAL _
| COLON _
| OR _
| AND _
| LT _
| LEQ _
| GT _
| GEQ _
| NEQ _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false
(* END TRAILER *)
}

153
Lexer.mli Normal file
View File

@ -0,0 +1,153 @@
(* Lexer specification for Ligo, to be processed by [ocamllex].
The underlying design principles are:
(1) enforce stylistic constraints at a lexical level, in order to
early reject potentially misleading or poorly written
Ligo contracts;
(2) provide precise error messages with hint as how to fix the
issue, which is achieved by consulting the lexical
right-context of lexemes;
(3) be as independent as possible from the Ligo version, so
upgrades have as little impact as possible on this
specification: this is achieved by using the most general
regular expressions to match the lexing buffer and broadly
distinguish the syntactic categories, and then delegating a
finer, protocol-dependent, second analysis to an external
module making the tokens (hence a functor below);
(4) support unit testing (lexing of the whole input with debug
traces);
The limitation to the protocol independence lies in the errors that
the external module building the tokens (which is
protocol-dependent) may have to report. Indeed these errors have to
be contextualised by the lexer in terms of input source regions, so
useful error messages can be printed, therefore they are part of
the signature [TOKEN] that parameterise the functor generated
here. For instance, if, in a future release of Ligo, new tokens may
be added, and the recognition of their lexemes may entail new
errors, the signature [TOKEN] will have to be augmented and the
lexer specification changed. However, it is more likely that
instructions or types are added, instead of new kinds of tokens.
*)
type lexeme = string
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. This enables to externalise
version-dependent constraints in any module whose signature matches
[TOKEN]. Generic functions to construct tokens are required.
Note the predicate [is_eof], which caracterises the virtual token
for end-of-file, because it requires special handling. Some of
those functions may yield errors, which are defined as values of
the type [int_err] etc. These errors can be better understood by
reading the ocamllex specification for the lexer ([Lexer.mll]).
*)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
* together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
parser generated by Menhir.
The argument labelled [log] is a logger. It may print a token and
its left markup to a given channel, at the caller's discretion.
*)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error : ?offsets:bool -> [`Byte | `Point] ->
Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : S with module Token = Token

803
Lexer.mll Normal file
View File

@ -0,0 +1,803 @@
(* Lexer specification for Ligo, to be processed by [ocamllex]. *)
{
(* START HEADER *)
type lexeme = string
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line line_num buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
let reset ~file ?(line=1) buffer =
(* Default value per the [Lexing] standard module convention *)
reset_file ~file buffer; reset_line line buffer
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* ALIASES *)
let sprintf = Printf.sprintf
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. Consequently, generic functions to
construct tokens are provided. Note predicate [is_eof], which
caracterises the virtual token for end-of-file, because it requires
special handling. *)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. *)
module type S = sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error :
?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : (S with module Token = Token) =
struct
module Token = Token
type token = Token.token
type file_path = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments,
we need to keep the region of the opening symbol (like double
quote, "#" or "/*") in order to report any error more
precisely. Since ocamllex is byte-oriented, we need to store
the parsed bytes are characters in an accumulator [acc] and
also its length [len], so, we are done, it is easy to build the
string making up the structured construct with [mk_str] (see
above).
The resulting data structure is called a _thread_.
*)
type thread = {
opening : Region.t;
len : int;
acc : char list
}
let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
let push_string str {opening; len; acc} =
{opening;
len = len + String.length str;
acc = explode str acc}
(* STATE *)
(* Beyond tokens, the result of lexing is a state (a so-called
_state monad_). The type [state] represents the logical state
of the lexing engine, that is, a value which is threaded during
scanning and which denotes useful, high-level information
beyond what the type [Lexing.lexbuf] in the standard library
already provides for all generic lexers.
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either
the start of the file or the end of the previously recognised
token.
The markup from the last recognised token or, if the first
token has not been recognised yet, from the beginning of the
file is stored in the field [markup] of the state because it is
a side-effect, with respect to the output token list, and we
use a record with a single field [units] because that record
may be easily extended during the future maintenance of this
lexer.
The state also includes a field [pos] which holds the current
position in the Ligo source file. The position is not always
updated after a single character has been matched: that depends
on the regular expression that matched the lexing buffer.
The fields [decoder] and [supply] offer the support needed
for the lexing of UTF-8 encoded characters in comments (the
only place where they are allowed in Ligo). The former is the
decoder proper and the latter is the effectful function
[supply] that takes a byte, a start index and a length and feed
it to [decoder]. See the documentation of the third-party
library Uutf.
*)
type state = {
units : (Markup.t list * token) FQueue.t;
markup : Markup.t list;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit
}
(* The call [enqueue (token, state)] updates functionally the
state [state] by associating the token [token] with the stored
markup and enqueuing the pair into the units queue. The field
[markup] is then reset to the empty list. *)
let enqueue (token, state) = {
state with
units = FQueue.enq (state.markup, token) state.units;
markup = []
}
(* The call [sync state buffer] updates the current position in
accordance with the contents of the lexing buffer, more
precisely, depending on the length of the string which has just
been recognised by the scanner: that length is used as a
positive offset to the current column. *)
let sync state buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = state.pos in
let stop = start#shift_bytes len in
let state = {state with pos = stop}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
let push_newline state buffer =
let value = Lexing.lexeme buffer
and () = Lexing.new_line buffer
and start = state.pos in
let stop = start#new_line value in
let state = {state with pos = stop}
and region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_line (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.LineCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_block (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.BlockCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_space state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_tabs state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_bom state buffer =
let region, value, state = sync state buffer in
let unit = Markup.BOM Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
(* ERRORS *)
type Error.t += Invalid_utf8_sequence
type Error.t += Unexpected_character of char
type Error.t += Undefined_escape_sequence
type Error.t += Missing_break
type Error.t += Unterminated_string
type Error.t += Unterminated_integer
type Error.t += Odd_lengthed_bytes
type Error.t += Unterminated_comment
type Error.t += Orphan_minus
type Error.t += Non_canonical_zero
type Error.t += Negative_byte_sequence
type Error.t += Broken_string
type Error.t += Invalid_character_in_string
type Error.t += Reserved_name
let error_to_string = function
Invalid_utf8_sequence ->
"Invalid UTF-8 sequence.\n"
| Unexpected_character c ->
sprintf "Unexpected character '%c'.\n" c
| Undefined_escape_sequence ->
"Undefined escape sequence.\n\
Hint: Remove or replace the sequence.\n"
| Missing_break ->
"Missing break.\n\
Hint: Insert some space.\n"
| Unterminated_string ->
"Unterminated string.\n\
Hint: Close with double quotes.\n"
| Unterminated_integer ->
"Unterminated integer.\n\
Hint: Remove the sign or proceed with a natural number.\n"
| Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit.\n"
| Unterminated_comment ->
"Unterminated comment.\n\
Hint: Close with \"*/\".\n"
| Orphan_minus ->
"Orphan minus sign.\n\
Hint: Remove the trailing space.\n"
| Non_canonical_zero ->
"Non-canonical zero.\n\
Hint: Use 0.\n"
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign.\n"
| Broken_string ->
"The string starting here is interrupted by a line break.\n\
Hint: Remove the break or close the string before.\n"
| Invalid_character_in_string ->
"Invalid character in string.\n\
Hint: Remove or replace the character.\n"
| Reserved_name ->
"Reserved named.\n\
Hint: Change the name.\n"
| _ -> assert false
exception Error of Error.t Region.reg
let fail region value = raise (Error Region.{region; value})
(* TOKENS *)
(* Making tokens *)
let mk_string (thread, state) =
let start = thread.opening#start in
let stop = state.pos in
let region = Region.make ~start ~stop in
let lexeme = mk_str thread.len thread.acc in
let token = Token.mk_string lexeme region
in token, state
let mk_bytes bytes state buffer =
let region, _, state = sync state buffer in
let token = Token.mk_bytes bytes region
in token, state
let mk_int state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_int lexeme region with
Ok token -> token, state
| Error Token.Non_canonical_zero ->
fail region Non_canonical_zero
let mk_ident state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_ident lexeme region with
Ok token -> token, state
| Error Token.Reserved_name -> fail region Reserved_name
let mk_constr state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_constr lexeme region, state
let mk_sym state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_sym lexeme region, state
let mk_eof state buffer =
let region, _, state = sync state buffer
in Token.eof region, state
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
let nl = ['\n' '\r'] | "\r\n"
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
let hexa_digit = digit | ['A'-'F']
let byte = hexa_digit hexa_digit
let byte_seq = byte | byte (byte | '_')* byte
let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
| "\\r" | "\\t" | "\\x" byte
let symbol = ';' | ','
| '(' | ')' | '{' | '}' | '[' | ']'
| "<:" | '|' | "->" | ":=" | '=' | ':'
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '.' | '_' | '^'
(* RULES *)
(* Except for the first rule [init], all rules bear a name starting
with "scan".
All have a parameter [state] that they thread through their
recursive calls. The rules for the structured constructs (strings
and comments) have an extra parameter of type [thread] (see above).
*)
rule init state = parse
utf8_bom { scan (push_bom state lexbuf) lexbuf }
| _ { rollback lexbuf; scan state lexbuf }
and scan state = parse
nl { scan (push_newline state lexbuf) lexbuf }
| ' '+ { scan (push_space state lexbuf) lexbuf }
| '\t'+ { scan (push_tabs state lexbuf) lexbuf }
| ident { mk_ident state lexbuf |> enqueue }
| constr { mk_constr state lexbuf |> enqueue }
| bytes { (mk_bytes seq) state lexbuf |> enqueue }
| integer { mk_int state lexbuf |> enqueue }
| symbol { mk_sym state lexbuf |> enqueue }
| eof { mk_eof state lexbuf |> enqueue }
| '"' { let opening, _, state = sync state lexbuf in
let thread = {opening; len=1; acc=['"']} in
scan_string thread state lexbuf |> mk_string |> enqueue }
| "/*" { let opening, _, state = sync state lexbuf in
let thread = {opening; len=2; acc=['*';'/']} in
let state = scan_block thread state lexbuf |> push_block
in scan state lexbuf }
| '#' { let opening, _, state = sync state lexbuf in
let thread = {opening; len=1; acc=['#']} in
let state = scan_line thread state lexbuf |> push_line
in scan state lexbuf }
(* Some special errors
Some special errors are recognised in the semantic actions of the
following regular expressions. The first error is a minus sign
separated from the integer it modifies by some markup (space or
tabs). The second is a minus sign immediately followed by
anything else than a natural number (matched above) or markup and
a number (previous error). The third is the strange occurrence of
an attempt at defining a negative byte sequence. Finally, the
catch-all rule reports unexpected characters in the buffer (and
is not so special, after all).
*)
| '-' { let region, _, state = sync state lexbuf in
let state = scan state lexbuf in
let open Markup in
match FQueue.peek state.units with
None -> assert false
| Some (_, ((Space _ | Tabs _)::_, token))
when Token.is_int token ->
fail region Orphan_minus
| _ -> fail region Unterminated_integer }
| '-' "0x" byte_seq?
{ let region, _, _ = sync state lexbuf
in fail region Negative_byte_sequence }
| _ as c { let region, _, _ = sync state lexbuf
in fail region (Unexpected_character c) }
(* Finishing a string *)
and scan_string thread state = parse
nl { fail thread.opening Broken_string }
| eof { fail thread.opening Unterminated_string }
| ['\t' '\r' '\b']
{ let region, _, _ = sync state lexbuf
in fail region Invalid_character_in_string }
| '"' { let _, _, state = sync state lexbuf
in push_char '"' thread, state }
| esc { let _, lexeme, state = sync state lexbuf
in scan_string (push_string lexeme thread) state lexbuf }
| '\\' _ { let region, _, _ = sync state lexbuf
in fail region Undefined_escape_sequence }
| _ as c { let _, _, state = sync state lexbuf in
scan_string (push_char c thread) state lexbuf }
(* Finishing a block comment
The lexing of block comments must take care of embedded block
comments that may occur within, as well as strings, so no substring
"*/" may inadvertantly close the block. This is the purpose of the
first case of the scanner [scan_block].
*)
and scan_block thread state = parse
'"' | "/*" { let opening = thread.opening in
let opening', lexeme, state = sync state lexbuf in
let thread = push_string lexeme thread in
let thread = {thread with opening=opening'} in
let next = if lexeme = "\"" then scan_string
else scan_block in
let thread, state = next thread state lexbuf in
let thread = {thread with opening}
in scan_block thread state lexbuf }
| "*/" { let _, lexeme, state = sync state lexbuf
in push_string lexeme thread, state }
| nl as nl { let () = Lexing.new_line lexbuf
and state = {state with pos = state.pos#new_line nl}
and thread = push_string nl thread
in scan_block thread state lexbuf }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_block thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
(* Finishing a line comment *)
and scan_line thread state = parse
nl as nl { let () = Lexing.new_line lexbuf
and thread = push_string nl thread
and state = {state with pos = state.pos#new_line nl}
in thread, state }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_line thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
and scan_utf8 thread state = parse
eof { fail thread.opening Unterminated_comment }
| _ as c { let thread = push_char c thread in
let lexeme = Lexing.lexeme lexbuf in
let () = state.supply (Bytes.of_string lexeme) 0 1 in
match Uutf.decode state.decoder with
`Uchar _ -> thread, None
| `Malformed _ -> thread, Some Invalid_utf8_sequence
| `Await -> scan_utf8 thread state lexbuf
| `End -> assert false }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Scanning the lexing buffer for tokens (and markup, as a
side-effect).
Because we want the lexer to have access to the right lexical
context of a recognised lexeme (to enforce stylistic constraints or
report special error patterns), we need to keep a hidden reference
to a queue of recognised lexical units (that is, tokens and markup)
that acts as a mutable state between the calls to
[read_token]. When [read_token] is called, that queue is consulted
first and, if it contains at least one token, that token is
returned; otherwise, the lexing buffer is scanned for at least one
more new token. That is the general principle: we put a high-level
buffer (our queue) on top of the low-level lexing buffer.
One tricky and important detail is that we must make any parser
generated by Menhir (and calling [read_token]) believe that the
last region of the input source that was matched indeed corresponds
to the returned token, despite that many tokens and markup may have
been matched since it was actually read from the input. In other
words, the parser requests a token that is taken from the
high-level buffer, but the parser requests the source regions from
the _low-level_ lexing buffer, and they may disagree if more than
one token has actually been recognised.
Consequently, in order to maintain a consistent view for the
parser, we have to patch some fields of the lexing buffer, namely
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
generated by Menhir when querying source positions (regions). This
is the purpose of the function [patch_buffer]. After reading one
ore more tokens and markup by the scanning rule [scan], we have to
save in the hidden reference [buf_reg] the region of the source
that was matched by [scan]. This atomic sequence of patching,
scanning and saving is implemented by the _function_ [scan]
(beware: it shadows the scanning rule [scan]). The function
[patch_buffer] is, of course, also called just before returning the
token, so the parser has a view of the lexing buffer consistent
with the token.
Note that an additional reference [first_call] is needed to
distinguish the first call to the function [scan], as the first
scanning rule is actually [init] (which can handle the BOM), not
[scan].
*)
type logger = Markup.t list -> token -> unit
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
let file_path = match EvalOpt.input with
None | Some "-" -> ""
| Some file_path -> file_path
let pos = Pos.min#set_file file_path
let buf_reg = ref (pos#byte, pos#byte)
and first_call = ref true
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
let supply = Uutf.Manual.src decoder
let state = ref {units = FQueue.empty;
last = Region.ghost;
pos;
markup = [];
decoder;
supply}
let get_pos () = !state.pos
let get_last () = !state.last
let patch_buffer (start, stop) buffer =
let open Lexing in
let file_path = buffer.lex_curr_p.pos_fname in
buffer.lex_start_p <- {start with pos_fname = file_path};
buffer.lex_curr_p <- {stop with pos_fname = file_path}
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
let scan buffer =
patch_buffer !buf_reg buffer;
(if !first_call
then (state := init !state buffer; first_call := false)
else state := scan !state buffer);
save_region buffer
let next_token buffer =
scan buffer;
match FQueue.peek !state.units with
None -> assert false
| Some (units, ext_token) ->
state := {!state with units}; Some ext_token
let check_right_context token buffer =
let open Token in
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_bytes token && is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next
|| is_bytes next || is_int next then
fail region Missing_break
| _ -> ()
else
if Token.is_ident token || Token.is_string token then
match next_token buffer with
Some ([], next) ->
if Token.is_ident next || Token.is_string next
|| Token.is_bytes next || Token.is_int next
then
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| _ -> ()
let rec read_token ?(log=fun _ _ -> ()) buffer =
match FQueue.deq !state.units with
None ->
scan buffer;
read_token ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := {!state with units; last = Token.to_region token};
check_right_context token buffer;
patch_buffer (Token.to_region token)#byte_pos buffer;
token
let open_token_stream file_path_opt =
let cin = match file_path_opt with
None | Some "-" -> stdin
| Some file_path -> open_in file_path in
let buffer = Lexing.from_channel cin in
let () = match file_path_opt with
None | Some "-" -> ()
| Some file_path -> reset ~file:file_path buffer
and close () = close_in cin in
{read = read_token; buffer; get_pos; get_last; close}
(* Standalone lexer for debugging purposes *)
(* Pretty-printing in a string the lexemes making up the markup
between two tokens, concatenated with the last lexeme itself. *)
let output_token ?(offsets=true) mode command
channel left_mark token : unit =
let output str = Printf.fprintf channel "%s%!" str in
let output_nl str = output (str ^ "\n") in
match command with
EvalOpt.Quiet -> ()
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Copy ->
let lexeme = Token.to_lexeme token
and apply acc markup = Markup.to_lexeme markup :: acc
in List.fold_left apply [lexeme] left_mark
|> String.concat "" |> output
| EvalOpt.Units ->
let abs_token = Token.to_string token ~offsets mode
and apply acc markup =
Markup.to_string markup ~offsets mode :: acc
in List.fold_left apply [abs_token] left_mark
|> String.concat "\n" |> output_nl
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
let trace ?(offsets=true) mode file_path_opt command : unit =
try
let {read; buffer; close; _} = open_token_stream file_path_opt
and cout = stdout in
let log = output_token ~offsets mode command cout
and close_all () = close (); close_out cout in
let rec iter () =
match read ~log buffer with
token ->
if Token.is_eof token then close_all ()
else iter ()
| exception Error e -> print_error ~offsets mode e; close_all ()
in iter ()
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
end (* of functor [Make] in HEADER *)
(* END TRAILER *)
}

17
LexerMain.ml Normal file
View File

@ -0,0 +1,17 @@
(* Driver for the lexer of Ligo *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken)
let () = Lexer.trace ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.input EvalOpt.cmd

6
MBytes.ml Normal file
View File

@ -0,0 +1,6 @@
(* TEMPORARY: SHOULD BE ERASED *)
type t = Hex.t
let of_hex x = x
let to_hex x = x

6
MBytes.mli Normal file
View File

@ -0,0 +1,6 @@
(* TEMPORARY: SHOULD BE ERASED *)
type t
val of_hex : Hex.t -> t
val to_hex : t -> Hex.t

42
Markup.ml Normal file
View File

@ -0,0 +1,42 @@
type lexeme = string
type t =
Tabs of int Region.reg
| Space of int Region.reg
| Newline of lexeme Region.reg
| LineCom of lexeme Region.reg
| BlockCom of lexeme Region.reg
| BOM of lexeme Region.reg
type markup = t
(* Pretty-printing *)
let sprintf = Printf.sprintf
let to_lexeme = function
Tabs Region.{value;_} -> String.make value '\t'
| Space Region.{value;_} -> String.make value ' '
| Newline Region.{value;_}
| LineCom Region.{value;_}
| BlockCom Region.{value;_}
| BOM Region.{value;_} -> value
let to_string markup ?(offsets=true) mode =
let region, val_str =
match markup with
Tabs Region.{value; region} ->
let lex = String.make value '\t' |> String.escaped
in region, sprintf "Tabs \"%s\"" lex
| Space Region.{value; region} ->
region, sprintf "Space \"%s\"" (String.make value ' ')
| Newline Region.{value; region} ->
region, sprintf "Newline \"%s\"" (String.escaped value)
| LineCom Region.{value; region} ->
region, sprintf "LineCom \"%s\"" (String.escaped value)
| BlockCom Region.{value; region} ->
region, sprintf "BlockCom \"%s\"" (String.escaped value)
| BOM Region.{value; region} ->
region, sprintf "BOM \"%s\"" (String.escaped value) in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str

32
Markup.mli Normal file
View File

@ -0,0 +1,32 @@
(* This module defines the sorts of markup recognised by the Ligo
lexer *)
(* A lexeme is piece of concrete syntax belonging to a token. In
algebraic terms, a token is also a piece of abstract lexical
syntax. Lexical units emcompass both markup and lexemes. *)
type lexeme = string
type t =
Tabs of int Region.reg (* Tabulations *)
| Space of int Region.reg (* Space *)
| Newline of lexeme Region.reg (* "\n" or "\c\r" escape characters *)
| LineCom of lexeme Region.reg (* Line comments *)
| BlockCom of lexeme Region.reg (* Block comments *)
| BOM of lexeme Region.reg (* Byte-Order Mark for UTF-8 (optional) *)
type markup = t
(* Pretty-printing of markup
The difference between [to_lexeme] and [to_string] is that the
former builds the corresponding concrete syntax (the lexeme),
whilst the latter makes up a textual representation of the abstract
syntax (the OCaml data constructors).
The result of [to_string] is escaped to avoid capture by the
terminal.
*)
val to_lexeme : t -> lexeme
val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string

87
ParToken.mly Normal file
View File

@ -0,0 +1,87 @@
%{
%}
(* Tokens (mirroring thise defined in module LexToken) *)
(* Literals *)
%token <LexToken.lexeme Region.reg> String
%token <(LexToken.lexeme * MBytes.t) Region.reg> Bytes
%token <(LexToken.lexeme * Z.t) Region.reg> Int
%token <LexToken.lexeme Region.reg> Ident
%token <LexToken.lexeme Region.reg> Constr
(* Symbols *)
%token <Region.t> SEMI
%token <Region.t> COMMA
%token <Region.t> LPAR
%token <Region.t> RPAR
%token <Region.t> LBRACE
%token <Region.t> RBRACE
%token <Region.t> LBRACKET
%token <Region.t> RBRACKET
%token <Region.t> CONS
%token <Region.t> VBAR
%token <Region.t> ARROW
%token <Region.t> ASGNMNT
%token <Region.t> EQUAL
%token <Region.t> COLON
%token <Region.t> OR
%token <Region.t> AND
%token <Region.t> LT
%token <Region.t> LEQ
%token <Region.t> GT
%token <Region.t> GEQ
%token <Region.t> NEQ
%token <Region.t> PLUS
%token <Region.t> MINUS
%token <Region.t> SLASH
%token <Region.t> TIMES
%token <Region.t> DOT
%token <Region.t> WILD
%token <Region.t> CAT
(* Keywords *)
%token <Region.t> Begin
%token <Region.t> Const
%token <Region.t> Down
%token <Region.t> If
%token <Region.t> In
%token <Region.t> Is
%token <Region.t> For
%token <Region.t> Function
%token <Region.t> Parameter
%token <Region.t> Storage
%token <Region.t> Type
%token <Region.t> Of
%token <Region.t> Operations
%token <Region.t> Var
%token <Region.t> End
%token <Region.t> Then
%token <Region.t> Else
%token <Region.t> Match
%token <Region.t> Null
%token <Region.t> Procedure
%token <Region.t> Record
%token <Region.t> Step
%token <Region.t> To
%token <Region.t> Mod
%token <Region.t> Not
%token <Region.t> While
%token <Region.t> With
(* Data constructors *)
%token <Region.t> C_False
%token <Region.t> C_None
%token <Region.t> C_Some
%token <Region.t> C_True
%token <Region.t> C_Unit
(* Virtual tokens *)
%token <Region.t> EOF
%%

627
Parser.mly Normal file
View File

@ -0,0 +1,627 @@
%{
(* START HEADER *)
open Region
open AST
(* END HEADER *)
%}
(* Entry points *)
%start program
%type <AST.t> program
%%
(* RULES *)
(* Compound constructs *)
par(X):
LPAR X RPAR {
let region = cover $1 $3
in {region; value = $1,$2,$3}
}
braces(X):
LBRACE X RBRACE {
let region = cover $1 $3
in {region; value = $1,$2,$3}
}
brackets(X):
LBRACKET X RBRACKET {
let region = cover $1 $3
in {region; value = $1,$2,$3}
}
(* Sequences
Series of instances of the same syntactical category have often to
be parsed, like lists of expressions, patterns etc. The simplest of
all is the possibly empty sequence (series), parsed below by
[seq]. The non-empty sequence is parsed by [nseq]. Note that the
latter returns a pair made of the first parsed item (the parameter
[X]) and the rest of the sequence (possibly empty). This way, the
OCaml typechecker can keep track of this information along the
static control-flow graph. The rule [sepseq] parses possibly empty
sequences of items separated by some token (e.g., a comma), and
rule [nsepseq] is for non-empty such sequences. See module [Utils]
for the types corresponding to the semantic actions of those rules.
*)
(* Possibly empty sequence of items *)
seq(X):
(**) { [] }
| X seq(X) { $1::$2 }
(* Non-empty sequence of items *)
nseq(X):
X seq(X) { $1,$2 }
(* Non-empty separated sequence of items *)
nsepseq(X,Sep):
X { $1, [] }
| X Sep nsepseq(X,Sep) { let h,t = $3 in $1, ($2,h)::t }
(* Possibly empy separated sequence of items *)
sepseq(X,Sep):
(**) { None }
| nsepseq(X,Sep) { Some $1 }
(* Inlines *)
%inline var : Ident { $1 }
%inline type_name : Ident { $1 }
%inline fun_name : Ident { $1 }
%inline field_name : Ident { $1 }
%inline map_name : Ident { $1 }
(* Main *)
program:
seq(type_decl)
parameter_decl
storage_decl
operations_decl
seq(lambda_decl)
block
EOF {
object
method types = $1
method parameter = $2
method storage = $3
method operations = $4
method lambdas = $5
method block = $6
method eof = $7
end
}
parameter_decl:
Parameter var COLON type_expr {
let stop = type_expr_to_region $4
in {region = cover $1 stop;
value = $1,$2,$3,$4}
}
storage_decl:
Storage type_expr {
let stop = type_expr_to_region $2
in {region = cover $1 stop;
value = $1,$2}
}
operations_decl:
Operations type_expr {
let stop = type_expr_to_region $2
in {region = cover $1 stop;
value = $1,$2}
}
(* Type declarations *)
type_decl:
Type type_name Is type_expr {
{region = cover $1 (type_expr_to_region $4);
value = $1,$2,$3,$4}
}
type_expr:
cartesian { Prod $1 }
| sum_type { Sum $1 }
| record_type { Record $1 }
cartesian:
nsepseq(core_type,TIMES) {
let region = nsepseq_to_region type_expr_to_region $1
in {region; value=$1}
}
core_type:
type_name {
TAlias $1
}
| type_name type_tuple {
let region = cover $1.region $2.region
in TypeApp {region; value = $1,$2}
}
| par(type_expr) {
ParType $1
}
type_tuple:
par(nsepseq(type_name,COMMA)) { $1 }
sum_type:
nsepseq(variant,VBAR) {
let region = nsepseq_to_region (fun x -> x.region) $1
in {region; value=$1}
}
variant:
Constr Of cartesian {
let region = cover $1.region $3.region
in {region; value = $1,$2,$3}
}
record_type:
Record
nsepseq(field_decl,SEMI)
End {
let region = cover $1 $3
in {region; value = $1,$2,$3}
}
field_decl:
field_name COLON type_expr {
let stop = type_expr_to_region $3 in
let region = cover $1.region stop
in {region; value = $1,$2,$3}
}
(* Function and procedure declarations *)
lambda_decl:
fun_decl { FunDecl $1 }
| proc_decl { ProcDecl $1 }
fun_decl:
Function fun_name parameters COLON type_expr Is
block
With expr {
let region = cover $1 (expr_to_region $9) in
let value =
object
method kwd_function = $1
method var = $2
method param = $3
method colon = $4
method ret_type = $5
method kwd_is = $6
method body = $7
method kwd_with = $8
method return = $9
end
in {region; value}
}
proc_decl:
Procedure fun_name parameters Is
block {
let region = cover $1 $5.region in
let value =
object
method kwd_procedure = $1
method var = $2
method param = $3
method kwd_is = $4
method body = $5
end
in {region; value}
}
parameters:
par(nsepseq(param_decl,SEMI)) { $1 }
param_decl:
var_kind var COLON type_expr {
let start = var_kind_to_region $1 in
let stop = type_expr_to_region $4 in
let region = cover start stop
in {region; value = $1,$2,$3,$4}
}
var_kind:
Var { Mutable $1 }
| Const { Const $1 }
block:
value_decls
Begin
instructions
End {
let region = cover $1.region $4 in
let value =
object
method decls = $1
method opening = $2
method instr = $3
method close = $4
end
in {region; value}
}
value_decls:
sepseq(var_decl,SEMI) {
let region = sepseq_to_region (fun x -> x.region) $1
in {region; value=$1}
}
var_decl:
Var var COLON type_expr ASGNMNT expr {
let region = cover $1 (expr_to_region $6) in
let value =
object
method kind = Mutable $1
method var = $2
method colon = $3
method vtype = $4
method setter = $5
method init = $6
end
in {region; value}
}
| Const var COLON type_expr EQUAL expr {
let region = cover $1 (expr_to_region $6) in
let value =
object
method kind = Const $1
method var = $2
method colon = $3
method vtype = $4
method setter = $5
method init = $6
end
in {region; value}
}
instructions:
nsepseq(instruction,SEMI) {
let region = nsepseq_to_region instr_to_region $1
in {region; value=$1}
}
instruction:
single_instr { Single $1 }
| block { Block $1 }
single_instr:
conditional { Cond $1 }
| match_instr { Match $1 }
| asgnmnt { Asgnmnt $1 }
| loop { Loop $1 }
| proc_call { ProcCall $1 }
| Null { Null $1 }
proc_call:
fun_call { $1 }
conditional:
If expr Then instruction Else instruction {
let region = cover $1 (instr_to_region $6) in
let value =
object
method kwd_if = $1
method test = $2
method kwd_then = $3
method ifso = $4
method kwd_else = $5
method ifnot = $6
end
in {region; value}
}
match_instr:
Match expr With cases End {
let region = cover $1 $5 in
let value =
object
method kwd_match = $1
method expr = $2
method kwd_with = $3
method cases = $4
method kwd_end = $5
end
in {region; value}
}
cases:
nsepseq(case,VBAR) {
let region = nsepseq_to_region (fun x -> x.region) $1
in {region; value=$1}
}
case:
pattern ARROW instruction {
let region = cover $1.region (instr_to_region $3)
in {region; value = $1,$2,$3}
}
asgnmnt:
var ASGNMNT expr {
let region = cover $1.region (expr_to_region $3)
in {region; value = $1,$2,$3}
}
loop:
while_loop { $1 }
| for_loop { $1 }
while_loop:
While expr block {
let region = cover $1 $3.region
in While {region; value=$1,$2,$3}
}
for_loop:
For asgnmnt Down? To expr option(step_clause) block {
let region = cover $1 $7.region in
let value =
object
method kwd_for = $1
method asgnmnt = $2
method down = $3
method kwd_to = $4
method bound = $5
method step = $6
method block = $7
end
in For (ForInt {region; value})
}
| For var option(arrow_clause) In expr block {
let region = cover $1 $6.region in
let value =
object
method kwd_for = $1
method var = $2
method bind_to = $3
method kwd_in = $4
method expr = $5
method block = $6
end
in For (ForCollect {region; value})
}
step_clause:
Step expr { $1,$2 }
arrow_clause:
ARROW var { $1,$2 }
(* Expressions *)
expr:
expr OR conj_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Or {region; value = $1,$2,$3}
}
| conj_expr { $1 }
conj_expr:
conj_expr AND comp_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
And {region; value = $1,$2,$3}
}
| comp_expr { $1 }
comp_expr:
comp_expr LT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Lt {region; value = $1,$2,$3}
}
| comp_expr LEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Leq {region; value = $1,$2,$3}
}
| comp_expr GT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Gt {region; value = $1,$2,$3}
}
| comp_expr GEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Geq {region; value = $1,$2,$3}
}
| comp_expr EQUAL cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Equal {region; value = $1,$2,$3}
}
| comp_expr NEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Neq {region; value = $1,$2,$3}
}
| cat_expr { $1 }
cat_expr:
cons_expr CAT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Cat {region; value = $1,$2,$3}
}
| cons_expr { $1 }
cons_expr:
add_expr CONS cons_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Cons {region; value = $1,$2,$3}
}
| add_expr { $1 }
add_expr:
add_expr PLUS mult_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Add {region; value = $1,$2,$3}
}
| add_expr MINUS mult_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Sub {region; value = $1,$2,$3}
}
| mult_expr { $1 }
mult_expr:
mult_expr TIMES unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Mult {region; value = $1,$2,$3}
}
| mult_expr SLASH unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Div {region; value = $1,$2,$3}
}
| mult_expr Mod unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop in
Mod {region; value = $1,$2,$3}
}
| unary_expr { $1 }
unary_expr:
MINUS core_expr {
let stop = expr_to_region $2 in
let region = cover $1 stop in
Neg {region; value = $1,$2}
}
| Not core_expr {
let stop = expr_to_region $2 in
let region = cover $1 stop in
Not {region; value = $1,$2}
}
| core_expr { $1 }
core_expr:
Int { Int $1 }
| var { Var $1 }
| String { String $1 }
| Bytes { Bytes $1 }
| C_False { False $1 }
| C_True { True $1 }
| C_Unit { Unit $1 }
| tuple { Tuple $1 }
| list_expr { List $1 }
| empty_list { EmptyList $1 }
| set_expr { Set $1 }
| empty_set { EmptySet $1 }
| none_expr { NoneExpr $1 }
| fun_call { FunCall $1 }
| Constr arguments {
let region = cover $1.region $2.region in
ConstrApp {region; value = $1,$2}
}
| C_Some arguments {
let region = cover $1 $2.region in
SomeApp {region; value = $1,$2}
}
| map_name DOT brackets(expr) {
let region = cover $1.region $3.region in
let value =
object
method map_name = $1
method selector = $2
method index = $3
end
in MapLookUp {region; value}
}
fun_call:
fun_name arguments {
let region = cover $1.region $2.region
in {region; value = $1,$2}
}
tuple:
par(nsepseq(expr,COMMA)) { $1 }
arguments:
tuple { $1 }
list_expr:
brackets(nsepseq(expr,COMMA)) { $1 }
empty_list:
par(LBRACKET RBRACKET COLON type_expr { $1,$2,$3,$4 }) { $1 }
set_expr:
braces(nsepseq(expr,COMMA)) { $1 }
empty_set:
par(LBRACE RBRACE COLON type_expr { $1,$2,$3,$4 }) { $1 }
none_expr:
par(C_None COLON type_expr { $1,$2,$3 }) { $1 }
(* Patterns *)
pattern:
nsepseq(core_pattern,CONS) {
let region = nsepseq_to_region core_pattern_to_region $1
in {region; value=$1}
}
core_pattern:
var { PVar $1 }
| WILD { PWild $1 }
| Int { PInt $1 }
| String { PString $1 }
| C_Unit { PUnit $1 }
| C_False { PFalse $1 }
| C_True { PTrue $1 }
| C_None { PNone $1 }
| list_patt { PList $1 }
| tuple_patt { PTuple $1 }
| C_Some par(core_pattern) {
let region = cover $1 $2.region
in PSome {region; value = $1,$2}
}
list_patt:
brackets(sepseq(core_pattern,COMMA)) { Sugar $1 }
| par(cons_pattern) { Raw $1 }
cons_pattern:
core_pattern CONS pattern { $1,$2,$3 }
tuple_patt:
par(nsepseq(core_pattern,COMMA)) { $1 }

71
ParserMain.ml Normal file
View File

@ -0,0 +1,71 @@
(* Driver for the parser of Ligo *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
let sprintf = Printf.sprintf
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
type Error.t += ParseError
let error_to_string = function
ParseError -> "Syntax error.\n"
| _ -> assert false
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
(* Path to the Ligo standard library *)
(*
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
*)
(* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken)
let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream EvalOpt.input
and cout = stdout
let log = Lexer.output_token ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.cmd cout
and close_all () = close (); close_out cout
(* Tokeniser *)
let tokeniser = read ~log
(* Main *)
let () =
try
let ast = Parser.program tokeniser buffer in
if Utils.String.Set.mem "parser" EvalOpt.verbose
then AST.print_tokens ast
with
Lexer.Error err ->
close_all ();
Lexer.print_error ~offsets EvalOpt.mode err
| Parser.Error ->
let region = get_last () in
let error = Region.{region; value=ParseError} in
let () = close_all () in
print_error ~offsets EvalOpt.mode error
| Sys_error msg -> Utils.highlight msg

124
Pos.ml Normal file
View File

@ -0,0 +1,124 @@
type t = <
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
set_file : string -> t;
set_line : int -> t;
new_line : string -> t;
add_nl : t;
shift_bytes : int -> t;
shift_one_uchar : int -> t;
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
is_ghost : bool;
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
let sprintf = Printf.sprintf
let make ~byte ~point_num ~point_bol =
let () = assert (point_num >= point_bol) in
object (self)
val byte = byte
method byte = byte
val point_num = point_num
method point_num = point_num
val point_bol = point_bol
method point_bol = point_bol
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >}
(* The string must not contain '\n'. See [new_line]. *)
method shift_bytes len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + len >}
method shift_one_uchar len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + 1 >}
method add_nl =
{< byte = Lexing.{byte with
pos_lnum = byte.pos_lnum + 1;
pos_bol = byte.pos_cnum};
point_bol = point_num >}
method new_line string =
let len = String.length string
in (self#shift_bytes len)#add_nl
method is_ghost = byte = Lexing.dummy_pos
method file = byte.Lexing.pos_fname
method line = byte.Lexing.pos_lnum
method offset = function
`Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
| `Point -> point_num - point_bol
method column mode = 1 + self#offset mode
method line_offset = function
`Byte -> byte.Lexing.pos_bol
| `Point -> point_bol
method byte_offset = byte.Lexing.pos_cnum
method to_string ?(offsets=true) mode =
let offset = self#offset mode in
let horizontal, value =
if offsets then "character", offset else "column", offset + 1
in sprintf "File \"%s\", line %i, %s %i"
self#file self#line horizontal value
method compact ?(offsets=true) mode =
if self#is_ghost then "ghost"
else
let offset = self#offset mode in
sprintf "%s:%i:%i"
self#file self#line (if offsets then offset else offset + 1)
method anonymous ?(offsets=true) mode =
if self#is_ghost then "ghost"
else sprintf "%i:%i" self#line
(if offsets then self#offset mode else self#column mode)
end
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
let min =
let byte = Lexing.{
pos_fname = "";
pos_lnum = 1;
pos_bol = 0;
pos_cnum = 0}
in make ~byte ~point_num:0 ~point_bol:0
(* Comparisons *)
let equal pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
let lt pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset

105
Pos.mli Normal file
View File

@ -0,0 +1,105 @@
(* Positions in a file
A position in a file denotes a single unit belonging to it, for
example, in an ASCII text file, it is a particular character within
that file (the unit is the byte in this instance, since in ASCII
one character is encoded with one byte).
Units can be either bytes (as ASCII characters) or, more
generally, unicode points.
The type for positions is the object type [t].
We use here lexing positions to denote byte-oriented positions
(field [byte]), and we manage code points by means of the fields
[point_num] and [point_bol]. These two fields have a meaning
similar to the fields [pos_cnum] and [pos_bol], respectively, from
the standard module [Lexing]. That is to say, [point_num] holds the
number of code points since the beginning of the file, and
[point_bol] the number of code points since the beginning of the
current line.
The name of the file is given by the field [file], and the line
number by the field [line].
*)
type t = <
(* Payload *)
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
(* Setters *)
set_file : string -> t;
set_line : int -> t;
(* The call [pos#new_line s], where the string [s] is either "\n" or
"\c\r", updates the position [pos] with a new line. *)
new_line : string -> t;
add_nl : t;
(* The call [pos#shift_bytes n] evaluates in a position that is the
translation of position [pos] of [n] bytes forward in the
file. The call [pos#shift_one_uchar n] is similar, except that it
assumes that [n] is the number of bytes making up one unicode
point. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
(* Getters *)
(* The call [pos#offset `Byte] provides the horizontal offset of the
position [pos] in bytes. (An offset is the number of units, like
bytes, since the beginning of the current line.) The call
[pos#offset `Point] is the offset counted in number of unicode
points.
The calls to the method [column] are similar to those to
[offset], except that they give the curren column number.
The call [pos#line_offset `Byte] is the offset of the line of
position [pos], counted in bytes. Dually, [pos#line_offset
`Point] counts the same offset in code points.
The call [pos#byte_offset] is the offset of the position [pos]
since the begininng of the file, counted in bytes.
*)
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
(* Special positions *)
val ghost : t (* Same as [Lexing.dummy_pos] *)
val min : t (* Lexing convention: line 1, offsets to 0 and file to "". *)
(* Comparisons *)
val equal : t -> t -> bool
val lt : t -> t -> bool

122
Region.ml Normal file
View File

@ -0,0 +1,122 @@
(* Regions of a file *)
let sprintf = Printf.sprintf
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
(* Getters *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Injections *)
exception Invalid
let make ~(start: Pos.t) ~(stop: Pos.t) =
if start#file <> stop#file || start#byte_offset > stop#byte_offset
then raise Invalid
else
object
val start = start
method start = start
val stop = stop
method stop = stop
method shift_bytes len =
let start = start#shift_bytes len
and stop = stop#shift_bytes len
in {< start = start; stop = stop >}
method shift_one_uchar len =
let start = start#shift_one_uchar len
and stop = stop#shift_one_uchar len
in {< start = start; stop = stop >}
(* Getters *)
method file = start#file
method pos = start, stop
method byte_pos = start#byte, stop#byte
(* Predicates *)
method is_ghost = start#is_ghost && stop#is_ghost
(* Conversions to strings *)
method to_string ?(file=true) ?(offsets=true) mode =
let horizontal = if offsets then "character" else "column"
and start_offset =
if offsets then start#offset mode else start#column mode
and stop_offset =
if offsets then stop#offset mode else stop#column mode in
let info =
if file
then sprintf "in file \"%s\", line %i, %s"
(String.escaped start#file) start#line horizontal
else sprintf "at line %i, %s" start#line horizontal
in if stop#line = start#line
then sprintf "%ss %i-%i" info start_offset stop_offset
else sprintf "%s %i to line %i, %s %i"
info start_offset stop#line horizontal stop_offset
method compact ?(file=true) ?(offsets=true) mode =
let start_str = start#anonymous ~offsets mode
and stop_str = stop#anonymous ~offsets mode in
if start#file = stop#file then
if file then sprintf "%s:%s-%s" start#file start_str stop_str
else sprintf "%s-%s" start_str stop_str
else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
end
(* Special regions *)
let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
let min = make ~start:Pos.min ~stop:Pos.min
(* Comparisons *)
let equal r1 r2 =
r1#file = r2#file
&& Pos.equal r1#start r2#start
&& Pos.equal r1#stop r2#stop
let lt r1 r2 =
r1#file = r2#file
&& not r1#is_ghost
&& not r2#is_ghost
&& Pos.lt r1#start r2#start
&& Pos.lt r1#stop r2#stop
let cover r1 r2 =
if r1#is_ghost
then r2
else if r2#is_ghost
then r1
else if lt r1 r2
then make ~start:r1#start ~stop:r2#stop
else make ~start:r2#start ~stop:r1#stop

123
Region.mli Normal file
View File

@ -0,0 +1,123 @@
(* Regions of a file
A _region_ is a contiguous series of bytes, for example, in a text
file. It is here denoted by the object type [t].
The start (included) of the region is given by the field [start],
which is a _position_, and the end (excluded) is the position given
by the field [stop]. The convention of including the start and
excluding the end enables to have empty regions if, and only if,
[start = stop]. See module [Pos] for the definition of positions.
The first byte of a file starts at the offset zero (that is,
column one), and [start] is always lower than or equal to [stop],
and they must refer to the same file.
*)
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
(* The call [region#shift_bytes n] evaluates in a region that is the
translation of region [region] of [n] bytes forward in the
file. The call [region#shift_one_uchar n] is similar, except that
it assumes that [n] is the number of bytes making up one unicode
point. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
(* Getters *)
(* The method [file] returns the file name.
The method [pos] returns the values of the fields [start] and [stop].
The method [byte_pos] returns the start and end positions of the
region at hand _interpreting them as lexing positions_, that is,
the unit is the byte. *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
(* The call [region#to_string ~file ~offsets mode] evaluates in a
string denoting the region [region].
The name of the file is present if, and only if, [file = true] or
[file] is missing.
The positions in the file are expressed horizontal offsets if
[offsets = true] or [offsets] is missing (the default), otherwise
as columns.
If [mode = `Byte], those positions will be assumed to have bytes
as their unit, otherwise, if [mode = `Point], they will be
assumed to refer to code points.
The method [compact] has the same signature and calling
convention as [to_string], except that the resulting string is
more compact.
*)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Constructors *)
(* The function [make] creates a region from two positions. If the
positions are not properly ordered or refer to different files, the
exception [Invalid] is raised. *)
exception Invalid
val make : start:Pos.t -> stop:Pos.t -> t
(* Special regions *)
(* To deal with ghost expressions, that is, pieces of abstract syntax
that have not been built from excerpts of concrete syntax, we need
_ghost regions_. The module [Pos] provides a [ghost] position, and
we also provide a [ghost] region and, in type [t], the method
[is_ghost] to check it. *)
val ghost : t (* Two [Pos.ghost] positions *)
(* Occasionnally, we may need a minimum region. It is here made of two
minimal positions. *)
val min : t (* Two [Pos.min] positions *)
(* Comparisons *)
(* Two regions are equal if, and only if, they refer to the same file
and their start positions are equal and their stop positions are
equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
val equal : t -> t -> bool
(* The call [lt r1 r2] ("lower than") has the value [true] if, and
only if, regions [r1] and [r2] refer to the same file, none is a
ghost and the start position of [r1] is lower than that of
[r2]. (See [Pos.lt].) *)
val lt : t -> t -> bool
(* Given two regions [r1] and [r2], we may want the region [cover r1
r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
(cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
that name because of the [min] function above.) If [r1] is a ghost,
the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
val cover : t -> t -> t

27
Tests/a.li Normal file
View File

@ -0,0 +1,27 @@
type t is int * string
type u is t
type v is record foo: key; bar: mutez; baz: address end
type w is K of v * u
parameter p : v
storage w
operations u
function f (const x : int) : int is
var y : int := 5 - x;
const z : int = 6
begin
y := x + y
end with y * 2
procedure g (const l : list (int)) is
begin
match l with
[] -> null
| h<:t -> q (h+2)
end
end
begin
g (Unit)
end

157
Utils.ml Normal file
View File

@ -0,0 +1,157 @@
(* Utility types and functions *)
(* Identity *)
let id x = x
(* Combinators *)
let (<@) f g x = f (g x)
let swap f x y = f y x
let lambda = fun x _ -> x
let curry f x y = f (x,y)
let uncurry f (x,y) = f x y
(* Parametric rules for sequences *)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
let nseq_cons x (hd,tl) = x, hd::tl
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
let sepseq_cons x sep = function
None -> x, []
| Some (hd,tl) -> x, (sep,hd)::tl
(* Rightwards iterators *)
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
let nsepseq_foldl f a (hd,tl) =
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
let sepseq_foldl f a = function
None -> a
| Some s -> nsepseq_foldl f a s
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
let sepseq_iter f = function
None -> ()
| Some s -> nsepseq_iter f s
(* Reversing *)
let nseq_rev (hd,tl) =
let rec aux acc = function
[] -> acc
| x::l -> aux (nseq_cons x acc) l
in aux (hd,[]) tl
let nsepseq_rev =
let rec aux acc = function
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
| hd, [] -> hd, acc in
function
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
| s -> s
let sepseq_rev = function
None -> None
| Some seq -> Some (nsepseq_rev seq)
(* Leftwards iterators *)
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
let sepseq_foldr f = function
None -> fun a -> a
| Some s -> nsepseq_foldr f s
(* Conversions to lists *)
let nseq_to_list (x,y) = x::y
let nsepseq_to_list (x,y) = x :: List.map snd y
let sepseq_to_list = function
None -> []
| Some s -> nsepseq_to_list s
(* Optional values *)
module Option =
struct
let apply f x =
match x with
Some y -> Some (f y)
| None -> None
let rev_apply x y =
match x with
Some f -> f y
| None -> y
let to_string = function
Some x -> x
| None -> ""
end
(* Modules based on [String], like sets and maps. *)
module String =
struct
include String
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Integers *)
module Int =
struct
type t = int
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Effectful symbol generator *)
let gen_sym =
let counter = ref 0 in
fun () -> incr counter; "v" ^ string_of_int !counter
(* General tracing function *)
let trace text = function
None -> ()
| Some chan -> output_string chan text; flush chan
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg

97
Utils.mli Normal file
View File

@ -0,0 +1,97 @@
(* Utility types and functions *)
(* Polymorphic identity function *)
val id : 'a -> 'a
(* Combinators *)
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
val lambda : 'a -> 'b -> 'a
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
(* Parametric rules for sequences
nseq: non-empty sequence;
sepseq: (possibly empty) sequence of separated items;
nsepseq: non-empty sequence of separated items.
*)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
val nseq_cons : 'a -> 'a nseq -> 'a nseq
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
(* Reversing *)
val nseq_rev: 'a nseq -> 'a nseq
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
(* Rightwards iterators *)
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
(* Leftwards iterators *)
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
(* Conversions to lists *)
val nseq_to_list : 'a nseq -> 'a list
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
val sepseq_to_list : ('a,'b) sepseq -> 'a list
(* Effectful symbol generator *)
val gen_sym : unit -> string
(* General tracing function *)
val trace : string -> out_channel option -> unit
(* Printing a string in red to standard error *)
val highlight : string -> unit
(* Working with optional values *)
module Option :
sig
val apply : ('a -> 'b) -> 'a option -> 'b option
val rev_apply : ('a -> 'a) option -> 'a -> 'a
val to_string : string option -> string
end
(* An extension to the standard module [String] *)
module String :
sig
include module type of String
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end
(* Integer maps *)
module Int :
sig
type t = int
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end

1
Version.ml Normal file
View File

@ -0,0 +1 @@
let version = "7445e9c0"

38
dune Normal file
View File

@ -0,0 +1,38 @@
(ocamllex LexToken)
(ocamllex Lexer)
(menhir
(merge_into Parser)
(modules ParToken Parser)
(flags -la 1 --explain --external-tokens LexToken)
)
(executables
(names LexerMain ParserMain)
(public_names ligo-lexer ligo-parser)
(package ligo-parser)
(modules_without_implementation
Error
)
(libraries
hex
zarith
getopt
uutf
str
)
)
(rule
(targets Parser.exe)
(deps ParserMain.exe)
(action (copy ParserMain.exe Parser.exe))
(mode promote-until-clean)
)
(rule
(targets Lexer.exe)
(deps LexerMain.exe)
(action (copy LexerMain.exe Lexer.exe))
(mode promote-until-clean)
)

2
dune-project Normal file
View File

@ -0,0 +1,2 @@
(lang dune 1.7)
(using menhir 2.0)

19
ligo-parser.opam Normal file
View File

@ -0,0 +1,19 @@
opam-version: "2.0"
version: "1.0"
maintainer: "gabriel.alfour@gmail.com"
authors: [ "Galfour" ]
homepage: "https://gitlab.com/gabriel.alfour/ligo-parser"
bug-reports: "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
dev-repo: "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
license: "MIT"
depends: [
"dune"
"menhir"
"hex" "zarith" "getopt" "uutf"
]
build: [
[ "dune" "build" "-p" name "-j" jobs ]
]
url {
src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
}