Removed manual copy of ligo-parser prior to merging Christian's history

This commit is contained in:
Georges Dupéron 2019-03-27 11:13:15 +01:00
parent 89a7821e87
commit ea358f7101
37 changed files with 0 additions and 6377 deletions

View File

@ -1 +0,0 @@
ocamlc: -w -42

View File

@ -1 +0,0 @@
--explain --external-tokens LexToken --base Parser ParToken.mly

View File

@ -1 +0,0 @@
/Version.ml

View File

@ -1,2 +0,0 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg

File diff suppressed because it is too large Load Diff

View File

@ -1,604 +0,0 @@
(* Abstract Syntax Tree (AST) for LIGO *)
[@@@warning "-30"]
open Utils
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
(* Keywords of LIGO *)
type kwd_begin = Region.t
type kwd_case = Region.t
type kwd_const = Region.t
type kwd_down = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_entrypoint = Region.t
type kwd_fail = Region.t
type kwd_for = Region.t
type kwd_function = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_is = Region.t
type kwd_map = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_patch = Region.t
type kwd_procedure = Region.t
type kwd_record = Region.t
type kwd_skip = Region.t
type kwd_step = Region.t
type kwd_storage = Region.t
type kwd_then = Region.t
type kwd_to = Region.t
type kwd_type = Region.t
type kwd_var = Region.t
type kwd_while = Region.t
type kwd_with = Region.t
(* Data constructors *)
type c_False = Region.t
type c_None = Region.t
type c_Some = Region.t
type c_True = Region.t
type c_Unit = Region.t
(* Symbols *)
type semi = Region.t (* ";" *)
type comma = Region.t (* "," *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type cons = Region.t (* "#" *)
type vbar = Region.t (* "|" *)
type arrow = Region.t (* "->" *)
type assign = Region.t (* ":=" *)
type equal = Region.t (* "=" *)
type colon = Region.t (* ":" *)
type bool_or = Region.t (* "||" *)
type bool_and = Region.t (* "&&" *)
type lt = Region.t (* "<" *)
type leq = Region.t (* "<=" *)
type gt = Region.t (* ">" *)
type geq = Region.t (* ">=" *)
type neq = Region.t (* "=/=" *)
type plus = Region.t (* "+" *)
type minus = Region.t (* "-" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
type dot = Region.t (* "." *)
type wild = Region.t (* "_" *)
type cat = Region.t (* "^" *)
(* Virtual tokens *)
type eof = Region.t
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type map_name = string reg
type constr = string reg
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* Braced compounds *)
type 'a braces = {
lbrace : lbrace;
inside : 'a;
rbrace : rbrace
}
(* The Abstract Syntax Tree *)
type t = {
decl : declaration nseq;
eof : eof
}
and ast = t
and declaration =
TypeDecl of type_decl reg
| ConstDecl of const_decl reg
| LambdaDecl of lambda_decl
and const_decl = {
kwd_const : kwd_const;
name : variable;
colon : colon;
const_type : type_expr;
equal : equal;
init : expr;
terminator : semi option
}
(* Type declarations *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
kwd_is : kwd_is;
type_expr : type_expr;
terminator : semi option
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) nsepseq reg
| TRecord of record_type reg
| TApp of (type_name * type_tuple) reg
| TPar of type_expr par reg
| TAlias of variable
and cartesian = (type_expr, times) nsepseq reg
and variant = {
constr : constr;
kwd_of : kwd_of;
product : cartesian
}
and record_type = {
kwd_record : kwd_record;
fields : field_decls;
kwd_end : kwd_end
}
and field_decls = (field_decl reg, semi) nsepseq
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) nsepseq par reg
(* Function and procedure declarations *)
and lambda_decl =
FunDecl of fun_decl reg
| ProcDecl of proc_decl reg
| EntryDecl of entry_decl reg
and fun_decl = {
kwd_function : kwd_function;
name : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
kwd_with : kwd_with;
return : expr;
terminator : semi option
}
and proc_decl = {
kwd_procedure : kwd_procedure;
name : variable;
param : parameters;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
terminator : semi option
}
and entry_decl = {
kwd_entrypoint : kwd_entrypoint;
name : variable;
param : entry_params;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
kwd_with : kwd_with;
return : expr;
terminator : semi option
}
and parameters = (param_decl, semi) nsepseq par reg
and entry_params = (entry_param_decl, semi) nsepseq par reg
and entry_param_decl =
EntryConst of param_const reg
| EntryVar of param_var reg
| EntryStore of storage reg
and storage = {
kwd_storage : kwd_storage;
var : variable;
colon : colon;
storage_type : type_expr
}
and param_decl =
ParamConst of param_const reg
| ParamVar of param_var reg
and param_const = {
kwd_const : kwd_const;
var : variable;
colon : colon;
param_type : type_expr
}
and param_var = {
kwd_var : kwd_var;
var : variable;
colon : colon;
param_type : type_expr
}
and block = {
opening : kwd_begin;
instr : instructions;
terminator : semi option;
close : kwd_end
}
and local_decl =
LocalLam of lambda_decl
| LocalConst of const_decl reg
| LocalVar of var_decl reg
and var_decl = {
kwd_var : kwd_var;
name : variable;
colon : colon;
var_type : type_expr;
assign : assign;
init : expr;
terminator : semi option
}
and instructions = (instruction, semi) nsepseq
and instruction =
Single of single_instr
| Block of block reg
and single_instr =
Cond of conditional reg
| Case of case_instr reg
| Assign of assignment reg
| Loop of loop
| ProcCall of fun_call
| Fail of fail_instr reg
| Skip of kwd_skip
| RecordPatch of record_patch reg
| MapPatch of map_patch reg
and map_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
map_inj : map_injection reg
}
and map_injection = {
opening : kwd_map;
bindings : (binding reg, semi) nsepseq;
terminator : semi option;
close : kwd_end
}
and binding = {
source : expr;
arrow : arrow;
image : expr
}
and record_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
record_inj : record_injection reg
}
and fail_instr = {
kwd_fail : kwd_fail;
fail_expr : expr
}
and conditional = {
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : instruction;
kwd_else : kwd_else;
ifnot : instruction
}
and case_instr = {
kwd_case : kwd_case;
expr : expr;
kwd_of : kwd_of;
lead_vbar : vbar option;
cases : cases;
kwd_end : kwd_end
}
and cases = (case reg, vbar) nsepseq reg
and case = {
pattern : pattern;
arrow : arrow;
instr : instruction
}
and assignment = {
lhs : lhs;
assign : assign;
rhs : rhs;
}
and lhs =
Path of path
| MapPath of map_lookup reg
and rhs =
Expr of expr
| NoneExpr of c_None
and loop =
While of while_loop reg
| For of for_loop
and while_loop = {
kwd_while : kwd_while;
cond : expr;
block : block reg
}
and for_loop =
ForInt of for_int reg
| ForCollect of for_collect reg
and for_int = {
kwd_for : kwd_for;
assign : var_assign reg;
down : kwd_down option;
kwd_to : kwd_to;
bound : expr;
step : (kwd_step * expr) option;
block : block reg
}
and var_assign = {
name : variable;
assign : assign;
expr : expr
}
and for_collect = {
kwd_for : kwd_for;
var : variable;
bind_to : (arrow * variable) option;
kwd_in : kwd_in;
expr : expr;
block : block reg
}
(* Expressions *)
and expr =
ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| EList of list_expr
| ESet of set_expr
| EConstr of constr_expr
| ERecord of record_expr
| EMap of map_expr
| EVar of Lexer.lexeme reg
| ECall of fun_call
| EBytes of (Lexer.lexeme * Hex.t) reg
| EUnit of c_Unit
| ETuple of tuple
| EPar of expr par reg
and map_expr =
MapLookUp of map_lookup reg
| MapInj of map_injection reg
and map_lookup = {
path : path;
index : expr brackets reg
}
and path =
Name of variable
| RecordPath of record_projection reg
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of bool_or bin_op reg
| And of bool_and bin_op reg
| Not of kwd_not un_op reg
| False of c_False
| True of c_True
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
and arith_expr =
Add of plus bin_op reg
| Sub of minus bin_op reg
| Mult of times bin_op reg
| Div of slash bin_op reg
| Mod of kwd_mod bin_op reg
| Neg of minus un_op reg
| Int of (Lexer.lexeme * Z.t) reg
and string_expr =
Cat of cat bin_op reg
| String of Lexer.lexeme reg
and list_expr =
Cons of cons bin_op reg
| List of (expr, comma) nsepseq brackets reg
| EmptyList of empty_list reg
and set_expr =
Set of (expr, comma) nsepseq braces reg
| EmptySet of empty_set reg
and constr_expr =
SomeApp of (c_Some * arguments) reg
| NoneExpr of none_expr reg
| ConstrApp of (constr * arguments) reg
and record_expr =
RecordInj of record_injection reg
| RecordProj of record_projection reg
and record_injection = {
opening : kwd_record;
fields : (field_assign reg, semi) nsepseq;
terminator : semi option;
close : kwd_end
}
and field_assign = {
field_name : field_name;
equal : equal;
field_expr : expr
}
and record_projection = {
record_name : variable;
selector : dot;
field_path : (field_name, dot) nsepseq
}
and tuple = (expr, comma) nsepseq par reg
and empty_list = typed_empty_list par
and typed_empty_list = {
lbracket : lbracket;
rbracket : rbracket;
colon : colon;
list_type : type_expr
}
and empty_set = typed_empty_set par
and typed_empty_set = {
lbrace : lbrace;
rbrace : rbrace;
colon : colon;
set_type : type_expr
}
and none_expr = typed_none_expr par
and typed_none_expr = {
c_None : c_None;
colon : colon;
opt_type : type_expr
}
and fun_call = (fun_name * arguments) reg
and arguments = tuple
(* Patterns *)
and pattern =
PCons of (pattern, cons) nsepseq reg
| PVar of Lexer.lexeme reg
| PWild of wild
| PInt of (Lexer.lexeme * Z.t) reg
| PBytes of (Lexer.lexeme * Hex.t) reg
| PString of Lexer.lexeme reg
| PUnit of c_Unit
| PFalse of c_False
| PTrue of c_True
| PNone of c_None
| PSome of (c_Some * pattern par reg) reg
| PList of list_pattern
| PTuple of (pattern, comma) nsepseq par reg
and list_pattern =
Sugar of (pattern, comma) sepseq brackets reg
| Raw of (pattern * cons * pattern) par reg
(* Projecting regions *)
val type_expr_to_region : type_expr -> Region.t
val expr_to_region : expr -> Region.t
val instr_to_region : instruction -> Region.t
val pattern_to_region : pattern -> Region.t
val local_decl_to_region : local_decl -> Region.t
val path_to_region : path -> Region.t
val lhs_to_region : lhs -> Region.t
val rhs_to_region : rhs -> Region.t
(* Printing *)
val print_tokens : t -> unit

View File

@ -1,3 +0,0 @@
type t = ..
type error = t

View File

@ -1,161 +0,0 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
let printf = Printf.printf
let sprintf = Printf.sprintf
let abort msg =
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file;
print_endline "where <input>.ligo is the LIGO source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:";
print_endline " -I <paths> Library paths (colon-separated)";
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
print_endline " -t, --tokens Print tokens (lexer)";
print_endline " -u, --units Print tokens and markup (lexer)";
print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
print_endline " --version Commit hash on stdout";
print_endline " -h, --help This help";
exit 0
(* Version *)
let version () = printf "%s\n" Version.version; exit 0
(* Specifying the command-line options a la GNU *)
let copy = ref false
and tokens = ref false
and units = ref false
and quiet = ref false
and columns = ref false
and bytes = ref false
and verbose = ref Utils.String.Set.empty
and input = ref None
and libs = ref []
let split_at_colon = Str.(split (regexp ":"))
let add_path p = libs := !libs @ split_at_colon p
let add_verbose d =
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
!verbose
(split_at_colon d)
let specs =
let open! Getopt in [
'I', nolong, None, Some add_path;
'c', "copy", set copy true, None;
't', "tokens", set tokens true, None;
'u', "units", set units true, None;
'q', "quiet", set quiet true, None;
noshort, "columns", set columns true, None;
noshort, "bytes", set bytes true, None;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some help, None;
noshort, "version", Some version, None
]
;;
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
;;
(* Parsing the command-line options *)
try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
;;
(* Checking options and exporting them as non-mutable values *)
type command = Quiet | Copy | Units | Tokens
let cmd =
match !quiet, !copy, !units, !tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
let string_of_path p =
let apply s a = if a = "" then s else s ^ ":" ^ a
in List.fold_right apply p ""
let quote s = sprintf "\"%s\"" s
let verbose_str =
let apply e a =
if a <> "" then sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "copy = %b\n" !copy;
printf "tokens = %b\n" !tokens;
printf "units = %b\n" !units;
printf "quiet = %b\n" !quiet;
printf "columns = %b\n" !columns;
printf "bytes = %b\n" !bytes;
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs)
;;
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
let input =
match !input with
None | Some "-" -> !input
| Some file_path ->
if Filename.check_suffix file_path ".ligo"
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .ligo."
(* Exporting remaining options as non-mutable values *)
let copy = !copy
and tokens = !tokens
and units = !units
and quiet = !quiet
and offsets = not !columns
and mode = if !bytes then `Byte else `Point
and verbose = !verbose
and libs = !libs
;;
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "copy = %b\n" copy;
printf "tokens = %b\n" tokens;
printf "units = %b\n" units;
printf "quiet = %b\n" quiet;
printf "offsets = %b\n" offsets;
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote input);
printf "I = %s\n" (string_of_path libs)
end
;;

View File

@ -1,46 +0,0 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
(* If the value [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets. *)
val offsets : bool
(* If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points. *)
val mode : [`Byte | `Point]
(* If the option [verbose] is set to a list of predefined stages of
the compiler chain, then more information may be displayed about
those stages. *)
val verbose : Utils.String.Set.t
(* If the value [input] is [None] or [Some "-"], the input is standard
input. If [Some f], then the input is the file whose name (file
path) is [f]. *)
val input : string option
(* Paths where to find LIGO files for inclusion *)
val libs : string list
(* If the value [cmd] is
* [Quiet], then no output from the lexer and parser should be
expected, safe error messages: this is the default value;
* [Copy], then lexemes of tokens and markup will be printed to
standard output, with the expectation of a perfect match with
the input file;
* [Units], then the tokens and markup will be printed to standard
output, that is, the abstract representation of the concrete
lexical syntax;
* [Tokens], then the tokens only will be printed.
*)
type command = Quiet | Copy | Units | Tokens
val cmd : command

View File

@ -1,19 +0,0 @@
(* Purely functional queues based on a pair of lists *)
type 'a t = {rear: 'a list; front: 'a list}
let empty = {rear=[]; front=[]}
let enq x q = {q with rear = x::q.rear}
let rec deq = function
{rear=[]; front= []} -> None
| {rear; front= []} -> deq {rear=[]; front = List.rev rear}
| {rear; front=x::f} -> Some ({rear; front=f}, x)
let rec peek = function
{rear=[]; front= []} -> None
| {rear; front= []} -> peek {rear=[]; front = List.rev rear}
| {rear=_; front=x::_} as q -> Some (q,x)
let is_empty q = (q = empty)

View File

@ -1,17 +0,0 @@
(* Purely functional queues *)
type 'a t
val empty : 'a t
val enq : 'a -> 'a t -> 'a t
val deq : 'a t -> ('a t * 'a) option
val is_empty : 'a t -> bool
(* The call [peek q] is [None] if the queue [q] is empty, and,
otherwise, is a pair made of a queue and the next item in it to be
dequeued. The returned queue contains the same items as [q], in the
same order, but more efficient, in general, to use in further
calls. *)
val peek : 'a t -> ('a t * 'a) option

View File

@ -1,151 +0,0 @@
(* This signature defines the lexical tokens for LIGO
_Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of
tokens is the minimal model of the input program, carrying
implicitly all its structure in a linear encoding, and nothing
else, in particular, comments and whitespace are absent.
A _lexeme_ is a specific character string (concrete
representation) denoting a token (abstract representation). Tokens
can be thought of as sets, and lexemes as elements of those sets --
there is often an infinite number of lexemes, but a small number of
tokens. (Think of identifiers as lexemes and one token.)
The tokens are qualified here as being "lexical" because the
parser generator Menhir expects to define them, in which context
they are called "parsing tokens", and they are made to match each
other. (This is an idiosyncratic terminology.)
The type of the lexical tokens is the variant [t], also
aliased to [token].
*)
type lexeme = string
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * Hex.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t (* ";" *)
| COMMA of Region.t (* "," *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| CONS of Region.t (* "#" *)
| VBAR of Region.t (* "|" *)
| ARROW of Region.t (* "->" *)
| ASS of Region.t (* ":=" *)
| EQUAL of Region.t (* "=" *)
| COLON of Region.t (* ":" *)
| LT of Region.t (* "<" *)
| LEQ of Region.t (* "<=" *)
| GT of Region.t (* ">" *)
| GEQ of Region.t (* ">=" *)
| NEQ of Region.t (* "=/=" *)
| PLUS of Region.t (* "+" *)
| MINUS of Region.t (* "-" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
| DOT of Region.t (* "." *)
| WILD of Region.t (* "_" *)
| CAT of Region.t (* "^" *)
(* Keywords *)
| And of Region.t (* "and" *)
| Begin of Region.t (* "begin" *)
| Case of Region.t (* "case" *)
| Const of Region.t (* "const" *)
| Down of Region.t (* "down" *)
| Else of Region.t (* "else" *)
| End of Region.t (* "end" *)
| Entrypoint of Region.t (* "entrypoint" *)
| Fail of Region.t (* "fail" *)
| For of Region.t (* "for" *)
| Function of Region.t (* "function" *)
| If of Region.t (* "if" *)
| In of Region.t (* "in" *)
| Is of Region.t (* "is" *)
| Map of Region.t (* "map" *)
| Mod of Region.t (* "mod" *)
| Not of Region.t (* "not" *)
| Of of Region.t (* "of" *)
| Or of Region.t (* "or" *)
| Patch of Region.t (* "patch" *)
| Procedure of Region.t (* "procedure" *)
| Record of Region.t (* "record" *)
| Skip of Region.t (* "skip" *)
| Step of Region.t (* "step" *)
| Storage of Region.t (* "storage" *)
| Then of Region.t (* "then" *)
| To of Region.t (* "to" *)
| Type of Region.t (* "type" *)
| Var of Region.t (* "var" *)
| While of Region.t (* "while" *)
| With of Region.t (* "with" *)
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
(* Projections
The difference between extracting the lexeme and a string from a
token is that the latter is the textual representation of the OCaml
value denoting the token (its abstract syntax), rather than its
lexeme (concrete syntax).
*)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Injections *)
type int_err =
Non_canonical_zero
type ident_err = Reserved_name
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool

View File

@ -1,624 +0,0 @@
(* Lexer specification for LIGO, to be processed by [ocamllex] *)
{
(* START HEADER *)
(* Shorthands *)
type lexeme = string
let sprintf = Printf.sprintf
module SMap = Utils.String.Map
module SSet = Utils.String.Set
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
*)
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * Hex.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t
| COMMA of Region.t
| LPAR of Region.t
| RPAR of Region.t
| LBRACE of Region.t
| RBRACE of Region.t
| LBRACKET of Region.t
| RBRACKET of Region.t
| CONS of Region.t
| VBAR of Region.t
| ARROW of Region.t
| ASS of Region.t
| EQUAL of Region.t
| COLON of Region.t
| LT of Region.t
| LEQ of Region.t
| GT of Region.t
| GEQ of Region.t
| NEQ of Region.t
| PLUS of Region.t
| MINUS of Region.t
| SLASH of Region.t
| TIMES of Region.t
| DOT of Region.t
| WILD of Region.t
| CAT of Region.t
(* Keywords *)
| And of Region.t (* "and" *)
| Begin of Region.t (* "begin" *)
| Case of Region.t (* "case" *)
| Const of Region.t (* "const" *)
| Down of Region.t (* "down" *)
| Else of Region.t (* "else" *)
| End of Region.t (* "end" *)
| Entrypoint of Region.t (* "entrypoint" *)
| Fail of Region.t (* "fail" *)
| For of Region.t (* "for" *)
| Function of Region.t (* "function" *)
| If of Region.t (* "if" *)
| In of Region.t (* "in" *)
| Is of Region.t (* "is" *)
| Map of Region.t (* "map" *)
| Mod of Region.t (* "mod" *)
| Not of Region.t (* "not" *)
| Of of Region.t (* "of" *)
| Or of Region.t (* "or" *)
| Patch of Region.t (* "patch" *)
| Procedure of Region.t (* "procedure" *)
| Record of Region.t (* "record" *)
| Skip of Region.t (* "skip" *)
| Step of Region.t (* "step" *)
| Storage of Region.t (* "storage" *)
| Then of Region.t (* "then" *)
| To of Region.t (* "to" *)
| Type of Region.t (* "type" *)
| Var of Region.t (* "var" *)
| While of Region.t (* "while" *)
| With of Region.t (* "with" *)
(* Types *)
(*
| T_address of Region.t (* "address" *)
| T_big_map of Region.t (* "big_map" *)
| T_bool of Region.t (* "bool" *)
| T_bytes of Region.t (* "bytes" *)
| T_contract of Region.t (* "contract" *)
| T_int of Region.t (* "int" *)
| T_key of Region.t (* "key" *)
| T_key_hash of Region.t (* "key_hash" *)
| T_list of Region.t (* "list" *)
| T_map of Region.t (* "map" *)
| T_mutez of Region.t (* "mutez" *)
| T_nat of Region.t (* "nat" *)
| T_operation of Region.t (* "operation" *)
| T_option of Region.t (* "option" *)
| T_set of Region.t (* "set" *)
| T_signature of Region.t (* "signature" *)
| T_string of Region.t (* "string" *)
| T_timestamp of Region.t (* "timestamp" *)
| T_unit of Region.t (* "unit" *)
*)
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
let proj_token = function
(* Literals *)
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.to_string b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident \"%s\"" value
| Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value
(* Symbols *)
| SEMI region -> region, "SEMI"
| COMMA region -> region, "COMMA"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| CONS region -> region, "CONS"
| VBAR region -> region, "VBAR"
| ARROW region -> region, "ARROW"
| ASS region -> region, "ASS"
| EQUAL region -> region, "EQUAL"
| COLON region -> region, "COLON"
| LT region -> region, "LT"
| LEQ region -> region, "LEQ"
| GT region -> region, "GT"
| GEQ region -> region, "GEQ"
| NEQ region -> region, "NEQ"
| PLUS region -> region, "PLUS"
| MINUS region -> region, "MINUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| CAT region -> region, "CAT"
(* Keywords *)
| And region -> region, "And"
| Begin region -> region, "Begin"
| Case region -> region, "Case"
| Const region -> region, "Const"
| Down region -> region, "Down"
| Else region -> region, "Else"
| End region -> region, "End"
| Entrypoint region -> region, "Entrypoint"
| Fail region -> region, "Fail"
| For region -> region, "For"
| Function region -> region, "Function"
| If region -> region, "If"
| In region -> region, "In"
| Is region -> region, "Is"
| Map region -> region, "Map"
| Mod region -> region, "Mod"
| Not region -> region, "Not"
| Of region -> region, "Of"
| Or region -> region, "Or"
| Patch region -> region, "Patch"
| Procedure region -> region, "Procedure"
| Record region -> region, "Record"
| Skip region -> region, "Skip"
| Step region -> region, "Step"
| Storage region -> region, "Storage"
| Then region -> region, "Then"
| To region -> region, "To"
| Type region -> region, "Type"
| Var region -> region, "Var"
| While region -> region, "While"
| With region -> region, "With"
(* Data *)
| C_False region -> region, "C_False"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| C_True region -> region, "C_True"
| C_Unit region -> region, "C_Unit"
(* Virtual tokens *)
| EOF region -> region, "EOF"
let to_lexeme = function
(* Literals *)
String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Int i -> fst i.Region.value
| Ident id
| Constr id -> id.Region.value
(* Symbols *)
| SEMI _ -> ";"
| COMMA _ -> ","
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| CONS _ -> "#"
| VBAR _ -> "|"
| ARROW _ -> "->"
| ASS _ -> ":="
| EQUAL _ -> "="
| COLON _ -> ":"
| LT _ -> "<"
| LEQ _ -> "<="
| GT _ -> ">"
| GEQ _ -> ">="
| NEQ _ -> "=/="
| PLUS _ -> "+"
| MINUS _ -> "-"
| SLASH _ -> "/"
| TIMES _ -> "*"
| DOT _ -> "."
| WILD _ -> "_"
| CAT _ -> "^"
(* Keywords *)
| And _ -> "and"
| Begin _ -> "begin"
| Case _ -> "case"
| Const _ -> "const"
| Down _ -> "down"
| Fail _ -> "fail"
| If _ -> "if"
| In _ -> "in"
| Is _ -> "is"
| Entrypoint _ -> "entrypoint"
| For _ -> "for"
| Function _ -> "function"
| Type _ -> "type"
| Of _ -> "of"
| Or _ -> "or"
| Var _ -> "var"
| End _ -> "end"
| Then _ -> "then"
| Else _ -> "else"
| Map _ -> "map"
| Patch _ -> "patch"
| Procedure _ -> "procedure"
| Record _ -> "record"
| Skip _ -> "skip"
| Step _ -> "step"
| Storage _ -> "storage"
| To _ -> "to"
| Mod _ -> "mod"
| Not _ -> "not"
| While _ -> "while"
| With _ -> "with"
(* Data constructors *)
| C_False _ -> "False"
| C_None _ -> "None"
| C_Some _ -> "Some"
| C_True _ -> "True"
| C_Unit _ -> "Unit"
(* Virtual tokens *)
| EOF _ -> ""
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str
let to_region token = proj_token token |> fst
(* LEXIS *)
let keywords = [
(fun reg -> And reg);
(fun reg -> Begin reg);
(fun reg -> Case reg);
(fun reg -> Const reg);
(fun reg -> Down reg);
(fun reg -> Fail reg);
(fun reg -> If reg);
(fun reg -> In reg);
(fun reg -> Is reg);
(fun reg -> Entrypoint reg);
(fun reg -> For reg);
(fun reg -> Function reg);
(fun reg -> Type reg);
(fun reg -> Of reg);
(fun reg -> Or reg);
(fun reg -> Var reg);
(fun reg -> End reg);
(fun reg -> Then reg);
(fun reg -> Else reg);
(fun reg -> Map reg);
(fun reg -> Patch reg);
(fun reg -> Procedure reg);
(fun reg -> Record reg);
(fun reg -> Skip reg);
(fun reg -> Step reg);
(fun reg -> Storage reg);
(fun reg -> To reg);
(fun reg -> Mod reg);
(fun reg -> Not reg);
(fun reg -> While reg);
(fun reg -> With reg)
]
let reserved =
let open SSet in
empty |> add "as"
|> add "asr"
|> add "assert"
|> add "class"
|> add "constraint"
|> add "do"
|> add "done"
|> add "downto"
|> add "exception"
|> add "external"
|> add "false"
|> add "fun"
|> add "functor"
|> add "include"
|> add "inherit"
|> add "initializer"
|> add "land"
|> add "lazy"
|> add "let"
|> add "lor"
|> add "lsl"
|> add "lsr"
|> add "lxor"
|> add "method"
|> add "module"
|> add "mutable"
|> add "new"
|> add "nonrec"
|> add "object"
|> add "open"
|> add "private"
|> add "rec"
|> add "sig"
|> add "struct"
|> add "true"
|> add "try"
|> add "val"
|> add "virtual"
|> add "when"
let constructors = [
(fun reg -> C_False reg);
(fun reg -> C_None reg);
(fun reg -> C_Some reg);
(fun reg -> C_True reg);
(fun reg -> C_Unit reg)
]
let add map (key, value) = SMap.add key value map
let mk_map mk_key list =
let apply map value = add map (mk_key value, value)
in List.fold_left apply SMap.empty list
type lexis = {
kwd : (Region.t -> token) SMap.t;
cstr : (Region.t -> token) SMap.t;
res : SSet.t
}
let lexicon : lexis =
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
in {kwd = build keywords;
cstr = build constructors;
res = reserved}
(* Identifiers *)
type ident_err = Reserved_name
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let digit = ['0'-'9']
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
(* Rules *)
rule scan_ident region lexicon = parse
(ident as value) eof {
if SSet.mem value lexicon.res
then Error Reserved_name
else Ok (match SMap.find_opt value lexicon.kwd with
Some mk_kwd -> mk_kwd region
| None -> Ident Region.{region; value}) }
and scan_constr region lexicon = parse
(constr as value) eof {
match SMap.find_opt value lexicon.cstr with
Some mk_cstr -> mk_cstr region
| None -> Constr Region.{region; value} }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Smart constructors (injections) *)
let mk_string lexeme region = String Region.{region; value=lexeme}
let mk_bytes lexeme region =
let norm = Str.(global_replace (regexp "_") "" lexeme) in
let value = lexeme, Hex.of_string norm
in Bytes Region.{region; value}
type int_err = Non_canonical_zero
let mk_int lexeme region =
let z = Str.(global_replace (regexp "_") "" lexeme)
|> Z.of_string in
if Z.equal z Z.zero && lexeme <> "0"
then Error Non_canonical_zero
else Ok (Int Region.{region; value = lexeme, z})
let eof region = EOF region
let mk_sym lexeme region =
match lexeme with
";" -> SEMI region
| "," -> COMMA region
| "(" -> LPAR region
| ")" -> RPAR region
| "{" -> LBRACE region
| "}" -> RBRACE region
| "[" -> LBRACKET region
| "]" -> RBRACKET region
| "#" -> CONS region
| "|" -> VBAR region
| "->" -> ARROW region
| ":=" -> ASS region
| "=" -> EQUAL region
| ":" -> COLON region
| "<" -> LT region
| "<=" -> LEQ region
| ">" -> GT region
| ">=" -> GEQ region
| "=/=" -> NEQ region
| "+" -> PLUS region
| "-" -> MINUS region
| "/" -> SLASH region
| "*" -> TIMES region
| "." -> DOT region
| "_" -> WILD region
| "^" -> CAT region
| _ -> assert false
(* Identifiers *)
let mk_ident' lexeme region lexicon =
Lexing.from_string lexeme |> scan_ident region lexicon
let mk_ident lexeme region = mk_ident' lexeme region lexicon
(* Constructors *)
let mk_constr' lexeme region lexicon =
Lexing.from_string lexeme |> scan_constr region lexicon
let mk_constr lexeme region = mk_constr' lexeme region lexicon
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
And _
| Begin _
| Case _
| Const _
| Down _
| Fail _
| If _
| In _
| Is _
| Entrypoint _
| For _
| Function _
| Type _
| Of _
| Or _
| Var _
| End _
| Then _
| Else _
| Map _
| Patch _
| Procedure _
| Record _
| Skip _
| Step _
| Storage _
| To _
| Mod _
| Not _
| While _
| With _ -> true
| _ -> false
let is_constr = function
Constr _
| C_False _
| C_None _
| C_Some _
| C_True _
| C_Unit _ -> true
| _ -> false
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASS _
| EQUAL _
| COLON _
| LT _
| LEQ _
| GT _
| GEQ _
| NEQ _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false
(* END TRAILER *)
}

View File

@ -1,153 +0,0 @@
(* Lexer specification for LIGO, to be processed by [ocamllex].
The underlying design principles are:
(1) enforce stylistic constraints at a lexical level, in order to
early reject potentially misleading or poorly written
LIGO contracts;
(2) provide precise error messages with hint as how to fix the
issue, which is achieved by consulting the lexical
right-context of lexemes;
(3) be as independent as possible from the LIGO version, so
upgrades have as little impact as possible on this
specification: this is achieved by using the most general
regular expressions to match the lexing buffer and broadly
distinguish the syntactic categories, and then delegating a
finer, protocol-dependent, second analysis to an external
module making the tokens (hence a functor below);
(4) support unit testing (lexing of the whole input with debug
traces);
The limitation to the protocol independence lies in the errors that
the external module building the tokens (which is
protocol-dependent) may have to report. Indeed these errors have to
be contextualised by the lexer in terms of input source regions, so
useful error messages can be printed, therefore they are part of
the signature [TOKEN] that parameterise the functor generated
here. For instance, if, in a future release of LIGO, new tokens may
be added, and the recognition of their lexemes may entail new
errors, the signature [TOKEN] will have to be augmented and the
lexer specification changed. However, it is more likely that
instructions or types are added, instead of new kinds of tokens.
*)
type lexeme = string
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. This enables to externalise
version-dependent constraints in any module whose signature matches
[TOKEN]. Generic functions to construct tokens are required.
Note the predicate [is_eof], which caracterises the virtual token
for end-of-file, because it requires special handling. Some of
those functions may yield errors, which are defined as values of
the type [int_err] etc. These errors can be better understood by
reading the ocamllex specification for the lexer ([Lexer.mll]).
*)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
* together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
parser generated by Menhir.
The argument labelled [log] is a logger. It may print a token and
its left markup to a given channel, at the caller's discretion.
*)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error : ?offsets:bool -> [`Byte | `Point] ->
Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : S with module Token = Token

View File

@ -1,873 +0,0 @@
(* Lexer specification for LIGO, to be processed by [ocamllex]. *)
{
(* START HEADER *)
type lexeme = string
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
Printf.printf "[reset] offset=%i\n" offset;
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* ALIASES *)
let sprintf = Printf.sprintf
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. Consequently, generic functions to
construct tokens are provided. Note predicate [is_eof], which
caracterises the virtual token for end-of-file, because it requires
special handling. *)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. *)
module type S = sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error :
?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : (S with module Token = Token) =
struct
module Token = Token
type token = Token.token
type file_path = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments,
we need to keep the region of the opening symbol (like double
quote, "//" or "(*") in order to report any error more
precisely. Since ocamllex is byte-oriented, we need to store
the parsed bytes as characters in an accumulator [acc] and
also its length [len], so, we are done, it is easy to build the
string making up the structured construct with [mk_str] (see
above).
The resulting data structure is called a _thread_.
(Note for Emacs: "*)".)
*)
type thread = {
opening : Region.t;
len : int;
acc : char list
}
let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
let push_string str {opening; len; acc} =
{opening;
len = len + String.length str;
acc = explode str acc}
(* STATE *)
(* Beyond tokens, the result of lexing is a state. The type
[state] represents the logical state of the lexing engine, that
is, a value which is threaded during scanning and which denotes
useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for
all generic lexers.
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either
the start of the file or the end of the previously recognised
token.
The markup from the last recognised token or, if the first
token has not been recognised yet, from the beginning of the
file is stored in the field [markup] of the state because it is
a side-effect, with respect to the output token list, and we
use a record with a single field [units] because that record
may be easily extended during the future maintenance of this
lexer.
The state also includes a field [pos] which holds the current
position in the LIGO source file. The position is not always
updated after a single character has been matched: that depends
on the regular expression that matched the lexing buffer.
The fields [decoder] and [supply] offer the support needed
for the lexing of UTF-8 encoded characters in comments (the
only place where they are allowed in LIGO). The former is the
decoder proper and the latter is the effectful function
[supply] that takes a byte, a start index and a length and feed
it to [decoder]. See the documentation of the third-party
library Uutf.
*)
type state = {
units : (Markup.t list * token) FQueue.t;
markup : Markup.t list;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit
}
(* The call [enqueue (token, state)] updates functionally the
state [state] by associating the token [token] with the stored
markup and enqueuing the pair into the units queue. The field
[markup] is then reset to the empty list. *)
let enqueue (token, state) = {
state with
units = FQueue.enq (state.markup, token) state.units;
markup = []
}
(* The call [sync state buffer] updates the current position in
accordance with the contents of the lexing buffer, more
precisely, depending on the length of the string which has just
been recognised by the scanner: that length is used as a
positive offset to the current column. *)
let sync state buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = state.pos in
let stop = start#shift_bytes len in
let state = {state with pos = stop}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
let push_newline state buffer =
let value = Lexing.lexeme buffer
and () = Lexing.new_line buffer
and start = state.pos in
let stop = start#new_line value in
let state = {state with pos = stop}
and region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_line (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.LineCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_block (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.BlockCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_space state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_tabs state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_bom state buffer =
let region, value, state = sync state buffer in
let unit = Markup.BOM Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
(* ERRORS *)
type Error.t += Invalid_utf8_sequence
type Error.t += Unexpected_character of char
type Error.t += Undefined_escape_sequence
type Error.t += Missing_break
type Error.t += Unterminated_string
type Error.t += Unterminated_integer
type Error.t += Odd_lengthed_bytes
type Error.t += Unterminated_comment
type Error.t += Orphan_minus
type Error.t += Non_canonical_zero
type Error.t += Negative_byte_sequence
type Error.t += Broken_string
type Error.t += Invalid_character_in_string
type Error.t += Reserved_name
let error_to_string = function
Invalid_utf8_sequence ->
"Invalid UTF-8 sequence.\n"
| Unexpected_character c ->
sprintf "Unexpected character '%s'.\n" (Char.escaped c)
| Undefined_escape_sequence ->
"Undefined escape sequence.\n\
Hint: Remove or replace the sequence.\n"
| Missing_break ->
"Missing break.\n\
Hint: Insert some space.\n"
| Unterminated_string ->
"Unterminated string.\n\
Hint: Close with double quotes.\n"
| Unterminated_integer ->
"Unterminated integer.\n\
Hint: Remove the sign or proceed with a natural number.\n"
| Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit.\n"
| Unterminated_comment ->
"Unterminated comment.\n\
Hint: Close with \"*)\".\n"
| Orphan_minus ->
"Orphan minus sign.\n\
Hint: Remove the trailing space.\n"
| Non_canonical_zero ->
"Non-canonical zero.\n\
Hint: Use 0.\n"
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign.\n"
| Broken_string ->
"The string starting here is interrupted by a line break.\n\
Hint: Remove the break or close the string before.\n"
| Invalid_character_in_string ->
"Invalid character in string.\n\
Hint: Remove or replace the character.\n"
| Reserved_name ->
"Reserved named.\n\
Hint: Change the name.\n"
| _ -> assert false
exception Error of Error.t Region.reg
let fail region value = raise (Error Region.{region; value})
(* TOKENS *)
(* Making tokens *)
let mk_string (thread, state) =
let start = thread.opening#start in
let stop = state.pos in
let region = Region.make ~start ~stop in
let lexeme = mk_str thread.len thread.acc in
let token = Token.mk_string lexeme region
in token, state
let mk_bytes bytes state buffer =
let region, _, state = sync state buffer in
let token = Token.mk_bytes bytes region
in token, state
let mk_int state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_int lexeme region with
Ok token -> token, state
| Error Token.Non_canonical_zero ->
fail region Non_canonical_zero
let mk_ident state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_ident lexeme region with
Ok token -> token, state
| Error Token.Reserved_name -> fail region Reserved_name
let mk_constr state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_constr lexeme region, state
let mk_sym state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_sym lexeme region, state
let mk_eof state buffer =
let region, _, state = sync state buffer
in Token.eof region, state
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
let nl = ['\n' '\r'] | "\r\n"
let blank = ' ' | '\t'
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
let hexa_digit = digit | ['A'-'F']
let byte = hexa_digit hexa_digit
let byte_seq = byte | byte (byte | '_')* byte
let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
| "\\r" | "\\t" | "\\x" byte
let symbol = ';' | ','
| '(' | ')' | '{' | '}' | '[' | ']'
| '#' | '|' | "->" | ":=" | '=' | ':'
| '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '.' | '_' | '^'
let string = [^'"' '\\' '\n']* (* For strings of #include *)
(* RULES *)
(* Except for the first rule [init], all rules bear a name starting
with "scan".
All have a parameter [state] that they thread through their
recursive calls. The rules for the structured constructs (strings
and comments) have an extra parameter of type [thread] (see above).
*)
rule init state = parse
utf8_bom { scan (push_bom state lexbuf) lexbuf }
| _ { rollback lexbuf; scan state lexbuf }
and scan state = parse
nl { scan (push_newline state lexbuf) lexbuf }
| ' '+ { scan (push_space state lexbuf) lexbuf }
| '\t'+ { scan (push_tabs state lexbuf) lexbuf }
| ident { mk_ident state lexbuf |> enqueue }
| constr { mk_constr state lexbuf |> enqueue }
| bytes { (mk_bytes seq) state lexbuf |> enqueue }
| integer { mk_int state lexbuf |> enqueue }
| symbol { mk_sym state lexbuf |> enqueue }
| eof { mk_eof state lexbuf |> enqueue }
| '"' { let opening, _, state = sync state lexbuf in
let thread = {opening; len=1; acc=['"']} in
scan_string thread state lexbuf |> mk_string |> enqueue }
| "(*" { let opening, _, state = sync state lexbuf in
let thread = {opening; len=2; acc=['*';'(']} in
let state = scan_block thread state lexbuf |> push_block
in scan state lexbuf }
| "//" { let opening, _, state = sync state lexbuf in
let thread = {opening; len=2; acc=['/';'/']} in
let state = scan_line thread state lexbuf |> push_line
in scan state lexbuf }
(* Management of #include CPP directives
An input LIGO program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for LIGO thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let _, _, state = sync state lexbuf in
let flags, state = scan_flags state [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let pos = state.pos#set ~file ~line ~offset:0 in
let state = {state with pos} in
scan state lexbuf
}
(* Some special errors
Some special errors are recognised in the semantic actions of the
following regular expressions. The first error is a minus sign
separated from the integer it modifies by some markup (space or
tabs). The second is a minus sign immediately followed by
anything else than a natural number (matched above) or markup and
a number (previous error). The third is the strange occurrence of
an attempt at defining a negative byte sequence. Finally, the
catch-all rule reports unexpected characters in the buffer (and
is not so special, after all).
*)
| '-' { let region, _, state = sync state lexbuf in
let state = scan state lexbuf in
let open Markup in
match FQueue.peek state.units with
None -> assert false
| Some (_, ((Space _ | Tabs _)::_, token))
when Token.is_int token ->
fail region Orphan_minus
| _ -> fail region Unterminated_integer }
| '-' "0x" byte_seq?
{ let region, _, _ = sync state lexbuf
in fail region Negative_byte_sequence }
| _ as c { let region, _, _ = sync state lexbuf
in fail region (Unexpected_character c) }
(* Scanning CPP #include flags *)
and scan_flags state acc = parse
blank+ { let _, _, state = sync state lexbuf
in scan_flags state acc lexbuf }
| integer as code { let _, _, state = sync state lexbuf in
let acc = int_of_string code :: acc
in scan_flags state acc lexbuf }
| nl { List.rev acc, push_newline state lexbuf }
| eof { let _, _, state = sync state lexbuf
in List.rev acc, state (* TODO *) }
(* Finishing a string *)
and scan_string thread state = parse
nl { fail thread.opening Broken_string }
| eof { fail thread.opening Unterminated_string }
| ['\t' '\r' '\b']
{ let region, _, _ = sync state lexbuf
in fail region Invalid_character_in_string }
| '"' { let _, _, state = sync state lexbuf
in push_char '"' thread, state }
| esc { let _, lexeme, state = sync state lexbuf
in scan_string (push_string lexeme thread) state lexbuf }
| '\\' _ { let region, _, _ = sync state lexbuf
in fail region Undefined_escape_sequence }
| _ as c { let _, _, state = sync state lexbuf in
scan_string (push_char c thread) state lexbuf }
(* Finishing a block comment
(Note for Emacs: ("(*")
The lexing of block comments must take care of embedded block
comments that may occur within, as well as strings, so no substring
"*)" may inadvertently close the block. This is the purpose
of the first case of the scanner [scan_block].
*)
and scan_block thread state = parse
'"' | "(*" { let opening = thread.opening in
let opening', lexeme, state = sync state lexbuf in
let thread = push_string lexeme thread in
let thread = {thread with opening=opening'} in
let next = if lexeme = "\"" then scan_string
else scan_block in
let thread, state = next thread state lexbuf in
let thread = {thread with opening}
in scan_block thread state lexbuf }
| "*)" { let _, lexeme, state = sync state lexbuf
in push_string lexeme thread, state }
| nl as nl { let () = Lexing.new_line lexbuf
and state = {state with pos = state.pos#new_line nl}
and thread = push_string nl thread
in scan_block thread state lexbuf }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_block thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
(* Finishing a line comment *)
and scan_line thread state = parse
nl as nl { let () = Lexing.new_line lexbuf
and thread = push_string nl thread
and state = {state with pos = state.pos#new_line nl}
in thread, state }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_line thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
and scan_utf8 thread state = parse
eof { fail thread.opening Unterminated_comment }
| _ as c { let thread = push_char c thread in
let lexeme = Lexing.lexeme lexbuf in
let () = state.supply (Bytes.of_string lexeme) 0 1 in
match Uutf.decode state.decoder with
`Uchar _ -> thread, None
| `Malformed _ -> thread, Some Invalid_utf8_sequence
| `Await -> scan_utf8 thread state lexbuf
| `End -> assert false }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Scanning the lexing buffer for tokens (and markup, as a
side-effect).
Because we want the lexer to have access to the right lexical
context of a recognised lexeme (to enforce stylistic constraints or
report special error patterns), we need to keep a hidden reference
to a queue of recognised lexical units (that is, tokens and markup)
that acts as a mutable state between the calls to
[read_token]. When [read_token] is called, that queue is consulted
first and, if it contains at least one token, that token is
returned; otherwise, the lexing buffer is scanned for at least one
more new token. That is the general principle: we put a high-level
buffer (our queue) on top of the low-level lexing buffer.
One tricky and important detail is that we must make any parser
generated by Menhir (and calling [read_token]) believe that the
last region of the input source that was matched indeed corresponds
to the returned token, despite that many tokens and markup may have
been matched since it was actually read from the input. In other
words, the parser requests a token that is taken from the
high-level buffer, but the parser requests the source regions from
the _low-level_ lexing buffer, and they may disagree if more than
one token has actually been recognised.
Consequently, in order to maintain a consistent view for the
parser, we have to patch some fields of the lexing buffer, namely
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
generated by Menhir when querying source positions (regions). This
is the purpose of the function [patch_buffer]. After reading one
ore more tokens and markup by the scanning rule [scan], we have to
save in the hidden reference [buf_reg] the region of the source
that was matched by [scan]. This atomic sequence of patching,
scanning and saving is implemented by the _function_ [scan]
(beware: it shadows the scanning rule [scan]). The function
[patch_buffer] is, of course, also called just before returning the
token, so the parser has a view of the lexing buffer consistent
with the token.
Note that an additional reference [first_call] is needed to
distinguish the first call to the function [scan], as the first
scanning rule is actually [init] (which can handle the BOM), not
[scan].
*)
type logger = Markup.t list -> token -> unit
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
let file_path = match EvalOpt.input with
None | Some "-" -> ""
| Some file_path -> file_path
let pos = Pos.min#set_file file_path
let buf_reg = ref (pos#byte, pos#byte)
and first_call = ref true
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
let supply = Uutf.Manual.src decoder
let state = ref {units = FQueue.empty;
last = Region.ghost;
pos;
markup = [];
decoder;
supply}
let get_pos () = !state.pos
let get_last () = !state.last
let patch_buffer (start, stop) buffer =
let open Lexing in
let file_path = buffer.lex_curr_p.pos_fname in
buffer.lex_start_p <- {start with pos_fname = file_path};
buffer.lex_curr_p <- {stop with pos_fname = file_path}
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
let scan buffer =
patch_buffer !buf_reg buffer;
(if !first_call
then (state := init !state buffer; first_call := false)
else state := scan !state buffer);
save_region buffer
let next_token buffer =
scan buffer;
match FQueue.peek !state.units with
None -> assert false
| Some (units, ext_token) ->
state := {!state with units}; Some ext_token
let check_right_context token buffer =
let open Token in
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_bytes token && is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next
|| is_bytes next || is_int next then
fail region Missing_break
| _ -> ()
else
if Token.is_ident token || Token.is_string token then
match next_token buffer with
Some ([], next) ->
if Token.is_ident next || Token.is_string next
|| Token.is_bytes next || Token.is_int next
then
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| _ -> ()
let rec read_token ?(log=fun _ _ -> ()) buffer =
match FQueue.deq !state.units with
None ->
scan buffer;
read_token ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := {!state with units; last = Token.to_region token};
check_right_context token buffer;
patch_buffer (Token.to_region token)#byte_pos buffer;
token
let open_token_stream file_path_opt =
let cin = match file_path_opt with
None | Some "-" -> stdin
| Some file_path -> open_in file_path in
let buffer = Lexing.from_channel cin in
let () = match file_path_opt with
None | Some "-" -> ()
| Some file_path -> reset ~file:file_path buffer
and close () = close_in cin in
{read = read_token; buffer; get_pos; get_last; close}
(* Standalone lexer for debugging purposes *)
(* Pretty-printing in a string the lexemes making up the markup
between two tokens, concatenated with the last lexeme itself. *)
let output_token ?(offsets=true) mode command
channel left_mark token : unit =
let output str = Printf.fprintf channel "%s%!" str in
let output_nl str = output (str ^ "\n") in
match command with
EvalOpt.Quiet -> ()
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Copy ->
let lexeme = Token.to_lexeme token
and apply acc markup = Markup.to_lexeme markup :: acc
in List.fold_left apply [lexeme] left_mark
|> String.concat "" |> output
| EvalOpt.Units ->
let abs_token = Token.to_string token ~offsets mode
and apply acc markup =
Markup.to_string markup ~offsets mode :: acc
in List.fold_left apply [abs_token] left_mark
|> String.concat "\n" |> output_nl
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
let trace ?(offsets=true) mode file_path_opt command : unit =
try
let {read; buffer; close; _} = open_token_stream file_path_opt
and cout = stdout in
let log = output_token ~offsets mode command cout
and close_all () = close (); close_out cout in
let rec iter () =
match read ~log buffer with
token ->
if Token.is_eof token then close_all ()
else iter ()
| exception Error e -> print_error ~offsets mode e; close_all ()
in iter ()
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
end (* of functor [Make] in HEADER *)
(* END TRAILER *)
}

View File

@ -1,55 +0,0 @@
(* Driver for the lexer of LIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken)
let () = Lexer.trace ~offsets:EvalOpt.offsets
EvalOpt.mode (Some pp_input) EvalOpt.cmd

View File

@ -1,6 +0,0 @@
(* TEMPORARY: SHOULD BE ERASED *)
type t = Hex.t
let of_hex x = x
let to_hex x = x

View File

@ -1,6 +0,0 @@
(* TEMPORARY: SHOULD BE ERASED *)
type t
val of_hex : Hex.t -> t
val to_hex : t -> Hex.t

View File

@ -1,42 +0,0 @@
type lexeme = string
type t =
Tabs of int Region.reg
| Space of int Region.reg
| Newline of lexeme Region.reg
| LineCom of lexeme Region.reg
| BlockCom of lexeme Region.reg
| BOM of lexeme Region.reg
type markup = t
(* Pretty-printing *)
let sprintf = Printf.sprintf
let to_lexeme = function
Tabs Region.{value;_} -> String.make value '\t'
| Space Region.{value;_} -> String.make value ' '
| Newline Region.{value;_}
| LineCom Region.{value;_}
| BlockCom Region.{value;_}
| BOM Region.{value;_} -> value
let to_string markup ?(offsets=true) mode =
let region, val_str =
match markup with
Tabs Region.{value; region} ->
let lex = String.make value '\t' |> String.escaped
in region, sprintf "Tabs \"%s\"" lex
| Space Region.{value; region} ->
region, sprintf "Space \"%s\"" (String.make value ' ')
| Newline Region.{value; region} ->
region, sprintf "Newline \"%s\"" (String.escaped value)
| LineCom Region.{value; region} ->
region, sprintf "LineCom \"%s\"" (String.escaped value)
| BlockCom Region.{value; region} ->
region, sprintf "BlockCom \"%s\"" (String.escaped value)
| BOM Region.{value; region} ->
region, sprintf "BOM \"%s\"" (String.escaped value) in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str

View File

@ -1,32 +0,0 @@
(* This module defines the sorts of markup recognised by the LIGO
lexer *)
(* A lexeme is piece of concrete syntax belonging to a token. In
algebraic terms, a token is also a piece of abstract lexical
syntax. Lexical units emcompass both markup and lexemes. *)
type lexeme = string
type t =
Tabs of int Region.reg (* Tabulations *)
| Space of int Region.reg (* Space *)
| Newline of lexeme Region.reg (* "\n" or "\c\r" escape characters *)
| LineCom of lexeme Region.reg (* Line comments *)
| BlockCom of lexeme Region.reg (* Block comments *)
| BOM of lexeme Region.reg (* Byte-Order Mark for UTF-8 (optional) *)
type markup = t
(* Pretty-printing of markup
The difference between [to_lexeme] and [to_string] is that the
former builds the corresponding concrete syntax (the lexeme),
whilst the latter makes up a textual representation of the abstract
syntax (the OCaml data constructors).
The result of [to_string] is escaped to avoid capture by the
terminal.
*)
val to_lexeme : t -> lexeme
val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string

View File

@ -1,89 +0,0 @@
%{
%}
(* Tokens (mirroring thise defined in module LexToken) *)
(* Literals *)
%token <LexToken.lexeme Region.reg> String
%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes
%token <(LexToken.lexeme * Z.t) Region.reg> Int
%token <LexToken.lexeme Region.reg> Ident
%token <LexToken.lexeme Region.reg> Constr
(* Symbols *)
%token <Region.t> SEMI (* ";" *)
%token <Region.t> COMMA (* "," *)
%token <Region.t> LPAR (* "(" *)
%token <Region.t> RPAR (* ")" *)
%token <Region.t> LBRACE (* "{" *)
%token <Region.t> RBRACE (* "}" *)
%token <Region.t> LBRACKET (* "[" *)
%token <Region.t> RBRACKET (* "]" *)
%token <Region.t> CONS (* "#" *)
%token <Region.t> VBAR (* "|" *)
%token <Region.t> ARROW (* "->" *)
%token <Region.t> ASS (* ":=" *)
%token <Region.t> EQUAL (* "=" *)
%token <Region.t> COLON (* ":" *)
%token <Region.t> LT (* "<" *)
%token <Region.t> LEQ (* "<=" *)
%token <Region.t> GT (* ">" *)
%token <Region.t> GEQ (* ">=" *)
%token <Region.t> NEQ (* "=/=" *)
%token <Region.t> PLUS (* "+" *)
%token <Region.t> MINUS (* "-" *)
%token <Region.t> SLASH (* "/" *)
%token <Region.t> TIMES (* "*" *)
%token <Region.t> DOT (* "." *)
%token <Region.t> WILD (* "_" *)
%token <Region.t> CAT (* "^" *)
(* Keywords *)
%token <Region.t> And (* "and" *)
%token <Region.t> Begin (* "begin" *)
%token <Region.t> Case (* "case" *)
%token <Region.t> Const (* "const" *)
%token <Region.t> Down (* "down" *)
%token <Region.t> Fail (* "fail" *)
%token <Region.t> If (* "if" *)
%token <Region.t> In (* "in" *)
%token <Region.t> Is (* "is" *)
%token <Region.t> Entrypoint (* "entrypoint" *)
%token <Region.t> For (* "for" *)
%token <Region.t> Function (* "function" *)
%token <Region.t> Type (* "type" *)
%token <Region.t> Of (* "of" *)
%token <Region.t> Or (* "or" *)
%token <Region.t> Var (* "var" *)
%token <Region.t> End (* "end" *)
%token <Region.t> Then (* "then" *)
%token <Region.t> Else (* "else" *)
%token <Region.t> Map (* "map" *)
%token <Region.t> Patch (* "patch" *)
%token <Region.t> Procedure (* "procedure" *)
%token <Region.t> Record (* "record" *)
%token <Region.t> Skip (* "skip" *)
%token <Region.t> Step (* "step" *)
%token <Region.t> Storage (* "storage" *)
%token <Region.t> To (* "to" *)
%token <Region.t> Mod (* "mod" *)
%token <Region.t> Not (* "not" *)
%token <Region.t> While (* "while" *)
%token <Region.t> With (* "with" *)
(* Data constructors *)
%token <Region.t> C_False (* "False" *)
%token <Region.t> C_None (* "None" *)
%token <Region.t> C_Some (* "Some" *)
%token <Region.t> C_True (* "True" *)
%token <Region.t> C_Unit (* "Unit" *)
(* Virtual tokens *)
%token <Region.t> EOF
%%

View File

@ -1,920 +0,0 @@
%{
(* START HEADER *)
[@@@warning "-42"]
open Region
open AST
(* END HEADER *)
%}
(* See [ParToken.mly] for the definition of tokens. *)
(* Entry points *)
%start contract interactive_expr
%type <AST.t> contract
%type <AST.expr> interactive_expr
%%
(* RULES *)
(* The rule [series(Item)] parses a list of [Item] separated by
semi-colons and optionally terminated by a semi-colon, then the
keyword [End]. *)
series(Item):
Item after_item(Item) { $1,$2 }
after_item(Item):
SEMI item_or_end(Item) {
match $2 with
`Some (item, items, term, close) ->
($1, item)::items, term, close
| `End close ->
[], Some $1, close
}
| End {
[], None, $1
}
item_or_end(Item):
End {
`End $1
}
| series(Item) {
let item, (items, term, close) = $1
in `Some (item, items, term, close)
}
(* Compound constructs *)
par(X):
LPAR X RPAR {
let region = cover $1 $3
and value = {
lpar = $1;
inside = $2;
rpar = $3}
in {region; value}
}
braces(X):
LBRACE X RBRACE {
let region = cover $1 $3
and value = {
lbrace = $1;
inside = $2;
rbrace = $3}
in {region; value}
}
brackets(X):
LBRACKET X RBRACKET {
let region = cover $1 $3
and value = {
lbracket = $1;
inside = $2;
rbracket = $3}
in {region; value}
}
(* Sequences
Series of instances of the same syntactical category have often to
be parsed, like lists of expressions, patterns etc. The simplest of
all is the possibly empty sequence (series), parsed below by
[seq]. The non-empty sequence is parsed by [nseq]. Note that the
latter returns a pair made of the first parsed item (the parameter
[X]) and the rest of the sequence (possibly empty). This way, the
OCaml typechecker can keep track of this information along the
static control-flow graph. The rule [sepseq] parses possibly empty
sequences of items separated by some token (e.g., a comma), and
rule [nsepseq] is for non-empty such sequences. See module [Utils]
for the types corresponding to the semantic actions of those rules.
*)
(* Possibly empty sequence of items *)
seq(X):
(**) { [] }
| X seq(X) { $1::$2 }
(* Non-empty sequence of items *)
nseq(X):
X seq(X) { $1,$2 }
(* Non-empty separated sequence of items *)
nsepseq(X,Sep):
X { $1, [] }
| X Sep nsepseq(X,Sep) { let h,t = $3 in $1, ($2,h)::t }
(* Possibly empy separated sequence of items *)
sepseq(X,Sep):
(**) { None }
| nsepseq(X,Sep) { Some $1 }
(* Inlines *)
%inline var : Ident { $1 }
%inline type_name : Ident { $1 }
%inline fun_name : Ident { $1 }
%inline field_name : Ident { $1 }
%inline record_name : Ident { $1 }
(* Main *)
contract:
nseq(declaration) EOF {
{decl = $1; eof = $2}
}
declaration:
type_decl { TypeDecl $1 }
| const_decl { ConstDecl $1 }
| lambda_decl { LambdaDecl $1 }
(* Type declarations *)
type_decl:
Type type_name Is type_expr option(SEMI) {
let stop =
match $5 with
Some region -> region
| None -> type_expr_to_region $4 in
let region = cover $1 stop in
let value = {
kwd_type = $1;
name = $2;
kwd_is = $3;
type_expr = $4;
terminator = $5}
in {region; value}}
type_expr:
cartesian { TProd $1 }
| sum_type { TSum $1 }
| record_type { TRecord $1 }
cartesian:
nsepseq(core_type,TIMES) {
let region = nsepseq_to_region type_expr_to_region $1
in {region; value=$1}
}
core_type:
type_name {
TAlias $1
}
| type_name type_tuple {
let region = cover $1.region $2.region
in TApp {region; value = $1,$2}
}
| Map type_tuple {
let region = cover $1 $2.region in
let value = {value="map"; region=$1}
in TApp {region; value = value, $2}
}
| par(type_expr) {
TPar $1
}
type_tuple:
par(nsepseq(type_expr,COMMA)) { $1 }
sum_type:
nsepseq(variant,VBAR) {
let region = nsepseq_to_region (fun x -> x.region) $1
in {region; value = $1}
}
variant:
Constr Of cartesian {
let region = cover $1.region $3.region
and value = {constr = $1; kwd_of = $2; product = $3}
in {region; value}
}
record_type:
Record
nsepseq(field_decl,SEMI)
End
{
let region = cover $1 $3
and value = {kwd_record = $1; fields = $2; kwd_end = $3}
in {region; value}
}
field_decl:
field_name COLON type_expr {
let stop = type_expr_to_region $3 in
let region = cover $1.region stop
and value = {field_name = $1; colon = $2; field_type = $3}
in {region; value}
}
(* Function and procedure declarations *)
lambda_decl:
fun_decl { FunDecl $1 }
| proc_decl { ProcDecl $1 }
| entry_decl { EntryDecl $1 }
fun_decl:
Function fun_name parameters COLON type_expr Is
seq(local_decl)
block
With expr option(SEMI) {
let stop =
match $11 with
Some region -> region
| None -> expr_to_region $10 in
let region = cover $1 stop in
let value = {
kwd_function = $1;
name = $2;
param = $3;
colon = $4;
ret_type = $5;
kwd_is = $6;
local_decls = $7;
block = $8;
kwd_with = $9;
return = $10;
terminator = $11}
in {region; value}
}
entry_decl:
Entrypoint fun_name entry_params COLON type_expr Is
seq(local_decl)
block
With expr option(SEMI) {
let stop =
match $11 with
Some region -> region
| None -> expr_to_region $10 in
let region = cover $1 stop in
let value = {
kwd_entrypoint = $1;
name = $2;
param = $3;
colon = $4;
ret_type = $5;
kwd_is = $6;
local_decls = $7;
block = $8;
kwd_with = $9;
return = $10;
terminator = $11}
in {region; value}
}
entry_params:
par(nsepseq(entry_param_decl,SEMI)) { $1 }
proc_decl:
Procedure fun_name parameters Is
seq(local_decl)
block option(SEMI)
{
let stop =
match $7 with
Some region -> region
| None -> $6.region in
let region = cover $1 stop in
let value = {
kwd_procedure = $1;
name = $2;
param = $3;
kwd_is = $4;
local_decls = $5;
block = $6;
terminator = $7}
in {region; value}
}
parameters:
par(nsepseq(param_decl,SEMI)) { $1 }
param_decl:
Var var COLON type_expr {
let stop = type_expr_to_region $4 in
let region = cover $1 stop
and value = {
kwd_var = $1;
var = $2;
colon = $3;
param_type = $4}
in ParamVar {region; value}
}
| Const var COLON type_expr {
let stop = type_expr_to_region $4 in
let region = cover $1 stop
and value = {
kwd_const = $1;
var = $2;
colon = $3;
param_type = $4}
in ParamConst {region; value}
}
entry_param_decl:
param_decl {
match $1 with
ParamConst const -> EntryConst const
| ParamVar var -> EntryVar var
}
| Storage var COLON type_expr {
let stop = type_expr_to_region $4 in
let region = cover $1 stop
and value = {
kwd_storage = $1;
var = $2;
colon = $3;
storage_type = $4}
in EntryStore {region; value}
}
block:
Begin series(instruction) {
let first, (others, terminator, close) = $2 in
let region = cover $1 close
and value = {
opening = $1;
instr = first, others;
terminator;
close}
in {region; value}
}
local_decl:
lambda_decl { LocalLam $1 }
| const_decl { LocalConst $1 }
| var_decl { LocalVar $1 }
unqualified_decl(OP):
var COLON type_expr OP extended_expr option(SEMI) {
let stop = match $6 with
Some region -> region
| None -> $5.region in
let init =
match $5.value with
`Expr e -> e
| `EList (lbracket, rbracket) ->
let region = $5.region
and value = {
lbracket;
rbracket;
colon = Region.ghost;
list_type = $3} in
let value = {
lpar = Region.ghost;
inside = value;
rpar = Region.ghost} in
EList (EmptyList {region; value})
| `ENone region ->
let value = {
lpar = Region.ghost;
inside = {
c_None = region;
colon = Region.ghost;
opt_type = $3};
rpar = Region.ghost}
in EConstr (NoneExpr {region; value})
| `EMap inj ->
EMap (MapInj inj)
in $1, $2, $3, $4, init, $6, stop
}
const_decl:
Const unqualified_decl(EQUAL) {
let name, colon, const_type, equal,
init, terminator, stop = $2 in
let region = cover $1 stop in
let value = {
kwd_const = $1;
name;
colon;
const_type;
equal;
init;
terminator}
in {region; value}
}
var_decl:
Var unqualified_decl(ASS) {
let name, colon, var_type, assign,
init, terminator, stop = $2 in
let region = cover $1 stop in
let value = {
kwd_var = $1;
name;
colon;
var_type;
assign;
init;
terminator}
in {region; value}
}
extended_expr:
expr { {region = expr_to_region $1;
value = `Expr $1} }
| LBRACKET RBRACKET { {region = cover $1 $2;
value = `EList ($1,$2)} }
| C_None { {region = $1; value = `ENone $1} }
| map_injection { {region = $1.region; value = `EMap $1} }
instruction:
single_instr { Single $1 }
| block { Block $1 }
single_instr:
conditional { Cond $1 }
| case_instr { Case $1 }
| assignment { Assign $1 }
| loop { Loop $1 }
| proc_call { ProcCall $1 }
| fail_instr { Fail $1 }
| Skip { Skip $1 }
| record_patch { RecordPatch $1 }
| map_patch { MapPatch $1 }
map_patch:
Patch path With map_injection {
let region = cover $1 $4.region in
let value = {
kwd_patch = $1;
path = $2;
kwd_with = $3;
map_inj = $4}
in {region; value}
}
map_injection:
Map series(binding) {
let first, (others, terminator, close) = $2 in
let region = cover $1 close
and value = {
opening = $1;
bindings = first, others;
terminator;
close}
in {region; value}
}
binding:
expr ARROW expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {
source = $1;
arrow = $2;
image = $3}
in {region; value}
}
record_patch:
Patch path With record_injection {
let region = cover $1 $4.region in
let value = {
kwd_patch = $1;
path = $2;
kwd_with = $3;
record_inj = $4}
in {region; value}
}
fail_instr:
Fail expr {
let region = cover $1 (expr_to_region $2)
and value = {kwd_fail = $1; fail_expr = $2}
in {region; value}}
proc_call:
fun_call { $1 }
conditional:
If expr Then instruction Else instruction {
let region = cover $1 (instr_to_region $6) in
let value = {
kwd_if = $1;
test = $2;
kwd_then = $3;
ifso = $4;
kwd_else = $5;
ifnot = $6}
in {region; value}
}
case_instr:
Case expr Of option(VBAR) cases End {
let region = cover $1 $6 in
let value = {
kwd_case = $1;
expr = $2;
kwd_of = $3;
lead_vbar = $4;
cases = $5;
kwd_end = $6}
in {region; value}
}
cases:
nsepseq(case,VBAR) {
let region = nsepseq_to_region (fun x -> x.region) $1
in {region; value = $1}
}
case:
pattern ARROW instruction {
let region = cover (pattern_to_region $1) (instr_to_region $3)
and value = {pattern = $1; arrow = $2; instr = $3}
in {region; value}
}
assignment:
lhs ASS rhs {
let stop = rhs_to_region $3 in
let region = cover (lhs_to_region $1) stop
and value = {lhs = $1; assign = $2; rhs = $3}
in {region; value}
}
rhs:
expr { Expr $1 }
| C_None { NoneExpr $1 : rhs }
lhs:
path { Path $1 }
| map_lookup { MapPath $1 }
loop:
while_loop { $1 }
| for_loop { $1 }
while_loop:
While expr block {
let region = cover $1 $3.region
and value = {
kwd_while = $1;
cond = $2;
block = $3}
in While {region; value}
}
for_loop:
For var_assign Down? To expr option(step_clause) block {
let region = cover $1 $7.region in
let value = {
kwd_for = $1;
assign = $2;
down = $3;
kwd_to = $4;
bound = $5;
step = $6;
block = $7}
in For (ForInt {region; value})
}
| For var option(arrow_clause) In expr block {
let region = cover $1 $6.region in
let value = {
kwd_for = $1;
var = $2;
bind_to = $3;
kwd_in = $4;
expr = $5;
block = $6}
in For (ForCollect {region; value})
}
var_assign:
var ASS expr {
let region = cover $1.region (expr_to_region $3)
and value = {name = $1; assign = $2; expr = $3}
in {region; value}
}
step_clause:
Step expr { $1,$2 }
arrow_clause:
ARROW var { $1,$2 }
(* Expressions *)
interactive_expr:
expr EOF { $1 }
expr:
expr Or conj_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3} in
ELogic (BoolExpr (Or {region; value}))
}
| conj_expr { $1 }
conj_expr:
conj_expr And comp_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (BoolExpr (And {region; value}))
}
| comp_expr { $1 }
comp_expr:
comp_expr LT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Lt {region; value}))
}
| comp_expr LEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Leq {region; value}))
}
| comp_expr GT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Gt {region; value}))
}
| comp_expr GEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Geq {region; value}))
}
| comp_expr EQUAL cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Equal {region; value}))
}
| comp_expr NEQ cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in ELogic (CompExpr (Neq {region; value}))
}
| cat_expr { $1 }
cat_expr:
cons_expr CAT cat_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EString (Cat {region; value})
}
| cons_expr { $1 }
cons_expr:
add_expr CONS cons_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EList (Cons {region; value})
}
| add_expr { $1 }
add_expr:
add_expr PLUS mult_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EArith (Add {region; value})
}
| add_expr MINUS mult_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EArith (Sub {region; value})
}
| mult_expr { $1 }
mult_expr:
mult_expr TIMES unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EArith (Mult {region; value})
}
| mult_expr SLASH unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EArith (Div {region; value})
}
| mult_expr Mod unary_expr {
let start = expr_to_region $1
and stop = expr_to_region $3 in
let region = cover start stop
and value = {arg1 = $1; op = $2; arg2 = $3}
in EArith (Mod {region; value})
}
| unary_expr { $1 }
unary_expr:
MINUS core_expr {
let stop = expr_to_region $2 in
let region = cover $1 stop
and value = {op = $1; arg = $2}
in EArith (Neg {region; value})
}
| Not core_expr {
let stop = expr_to_region $2 in
let region = cover $1 stop
and value = {op = $1; arg = $2} in
ELogic (BoolExpr (Not {region; value}))
}
| core_expr { $1 }
core_expr:
Int { EArith (Int $1) }
| var { EVar $1 }
| String { EString (String $1) }
| Bytes { EBytes $1 }
| C_False { ELogic (BoolExpr (False $1)) }
| C_True { ELogic (BoolExpr (True $1)) }
| C_Unit { EUnit $1 }
| tuple { ETuple $1 }
| list_expr { EList (List $1) }
| empty_list { EList (EmptyList $1) }
| set_expr { ESet (Set $1) }
| empty_set { ESet (EmptySet $1) }
| none_expr { EConstr (NoneExpr $1) }
| fun_call { ECall $1 }
| map_expr { EMap $1 }
| record_expr { ERecord $1 }
| Constr arguments {
let region = cover $1.region $2.region in
EConstr (ConstrApp {region; value = $1,$2})
}
| C_Some arguments {
let region = cover $1 $2.region in
EConstr (SomeApp {region; value = $1,$2})
}
map_expr:
map_lookup { MapLookUp $1 }
path:
var { Name $1 }
| record_projection { RecordPath $1 }
map_lookup:
path brackets(expr) {
let region = cover (path_to_region $1) $2.region in
let value = {
path = $1;
index = $2}
in {region; value}
}
record_expr:
record_injection { RecordInj $1 }
| record_projection { RecordProj $1 }
record_injection:
Record series(field_assignment) {
let first, (others, terminator, close) = $2 in
let region = cover $1 close
and value = {
opening = $1;
fields = first, others;
terminator;
close}
in {region; value}
}
field_assignment:
field_name EQUAL expr {
let region = cover $1.region (expr_to_region $3)
and value = {
field_name = $1;
equal = $2;
field_expr = $3}
in {region; value}
}
record_projection:
record_name DOT nsepseq(field_name,DOT) {
let stop = nsepseq_to_region (fun x -> x.region) $3 in
let region = cover $1.region stop
and value = {
record_name = $1;
selector = $2;
field_path = $3}
in {region; value}
}
fun_call:
fun_name arguments {
let region = cover $1.region $2.region
in {region; value = $1,$2}
}
tuple:
par(nsepseq(expr,COMMA)) { $1 }
arguments:
tuple { $1 }
list_expr:
brackets(nsepseq(expr,COMMA)) { $1 }
empty_list:
par(typed_empty_list) { $1 }
typed_empty_list:
LBRACKET RBRACKET COLON type_expr {
{lbracket = $1;
rbracket = $2;
colon = $3;
list_type = $4}
}
set_expr:
braces(nsepseq(expr,COMMA)) { $1 }
empty_set:
par(typed_empty_set) { $1 }
typed_empty_set:
LBRACE RBRACE COLON type_expr {
{lbrace = $1;
rbrace = $2;
colon = $3;
set_type = $4}
}
none_expr:
par(typed_none_expr) { $1 }
typed_none_expr:
C_None COLON type_expr {
{c_None = $1;
colon = $2;
opt_type = $3}
}
(* Patterns *)
pattern:
nsepseq(core_pattern,CONS) {
let region = nsepseq_to_region pattern_to_region $1
in PCons {region; value=$1}
}
core_pattern:
var { PVar $1 }
| WILD { PWild $1 }
| Int { PInt $1 }
| String { PString $1 }
| C_Unit { PUnit $1 }
| C_False { PFalse $1 }
| C_True { PTrue $1 }
| C_None { PNone $1 }
| list_patt { PList $1 }
| tuple_patt { PTuple $1 }
| C_Some par(core_pattern) {
let region = cover $1 $2.region
in PSome {region; value = $1,$2}
}
list_patt:
brackets(sepseq(core_pattern,COMMA)) { Sugar $1 }
| par(cons_pattern) { Raw $1 }
cons_pattern:
core_pattern CONS pattern { $1,$2,$3 }
tuple_patt:
par(nsepseq(core_pattern,COMMA)) { $1 }

View File

@ -1,118 +0,0 @@
(* Driver for the parser of LIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
let sprintf = Printf.sprintf
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
type Error.t += ParseError
let error_to_string = function
ParseError -> "Syntax error.\n"
| _ -> assert false
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken)
let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream (Some pp_input)
and cout = stdout
let log = Lexer.output_token ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.cmd cout
and close_all () = close (); close_out cout
(* Tokeniser *)
let tokeniser = read ~log
(* Main *)
let () =
try
let ast = Parser.contract tokeniser buffer in
if Utils.String.Set.mem "ast" EvalOpt.verbose
then AST.print_tokens ast
with
Lexer.Error err ->
close_all ();
Lexer.print_error ~offsets EvalOpt.mode err
| Parser.Error ->
let region = get_last () in
let error = Region.{region; value=ParseError} in
let () = close_all () in
print_error ~offsets EvalOpt.mode error
| Sys_error msg -> Utils.highlight msg
(*
(* Temporary: force dune to build AST2.ml *)
let () =
let open AST2 in
let _ = s_ast in
()
(*
(* Temporary: force dune to build AST2.ml *)
let () =
if false then
let _ = Typecheck2.annotate in
()
else
()
*)
*)

View File

@ -1,138 +0,0 @@
type t = <
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
new_line : string -> t;
add_nl : t;
shift_bytes : int -> t;
shift_one_uchar : int -> t;
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
is_ghost : bool;
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
let sprintf = Printf.sprintf
let make ~byte ~point_num ~point_bol =
let () = assert (point_num >= point_bol) in
object (self)
val byte = byte
method byte = byte
val point_num = point_num
method point_num = point_num
val point_bol = point_bol
method point_bol = point_bol
method set_file file =
{< byte = Lexing.{byte with pos_fname = file} >}
method set_line line =
{< byte = Lexing.{byte with pos_lnum = line} >}
method set_offset offset =
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
method set ~file ~line ~offset =
let pos = self#set_file file in
let pos = pos#set_line line in
let pos = pos#set_offset offset
in pos
(* The string must not contain '\n'. See [new_line]. *)
method shift_bytes len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + len >}
method shift_one_uchar len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + 1 >}
method add_nl =
{< byte = Lexing.{byte with
pos_lnum = byte.pos_lnum + 1;
pos_bol = byte.pos_cnum};
point_bol = point_num >}
method new_line string =
let len = String.length string
in (self#shift_bytes len)#add_nl
method is_ghost = byte = Lexing.dummy_pos
method file = byte.Lexing.pos_fname
method line = byte.Lexing.pos_lnum
method offset = function
`Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
| `Point -> point_num - point_bol
method column mode = 1 + self#offset mode
method line_offset = function
`Byte -> byte.Lexing.pos_bol
| `Point -> point_bol
method byte_offset = byte.Lexing.pos_cnum
method to_string ?(offsets=true) mode =
let offset = self#offset mode in
let horizontal, value =
if offsets then "character", offset else "column", offset + 1
in sprintf "File \"%s\", line %i, %s %i"
self#file self#line horizontal value
method compact ?(offsets=true) mode =
if self#is_ghost then "ghost"
else
let offset = self#offset mode in
sprintf "%s:%i:%i"
self#file self#line (if offsets then offset else offset + 1)
method anonymous ?(offsets=true) mode =
if self#is_ghost then "ghost"
else sprintf "%i:%i" self#line
(if offsets then self#offset mode else self#column mode)
end
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
let min =
let byte = Lexing.{
pos_fname = "";
pos_lnum = 1;
pos_bol = 0;
pos_cnum = 0}
in make ~byte ~point_num:0 ~point_bol:0
(* Comparisons *)
let equal pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
let lt pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset

View File

@ -1,107 +0,0 @@
(* Positions in a file
A position in a file denotes a single unit belonging to it, for
example, in an ASCII text file, it is a particular character within
that file (the unit is the byte in this instance, since in ASCII
one character is encoded with one byte).
Units can be either bytes (as ASCII characters) or, more
generally, unicode points.
The type for positions is the object type [t].
We use here lexing positions to denote byte-oriented positions
(field [byte]), and we manage code points by means of the fields
[point_num] and [point_bol]. These two fields have a meaning
similar to the fields [pos_cnum] and [pos_bol], respectively, from
the standard module [Lexing]. That is to say, [point_num] holds the
number of code points since the beginning of the file, and
[point_bol] the number of code points since the beginning of the
current line.
The name of the file is given by the field [file], and the line
number by the field [line].
*)
type t = <
(* Payload *)
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
(* Setters *)
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
(* The call [pos#new_line s], where the string [s] is either "\n" or
"\c\r", updates the position [pos] with a new line. *)
new_line : string -> t;
add_nl : t;
(* The call [pos#shift_bytes n] evaluates in a position that is the
translation of position [pos] of [n] bytes forward in the
file. The call [pos#shift_one_uchar n] is similar, except that it
assumes that [n] is the number of bytes making up one unicode
point. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
(* Getters *)
(* The call [pos#offset `Byte] provides the horizontal offset of the
position [pos] in bytes. (An offset is the number of units, like
bytes, since the beginning of the current line.) The call
[pos#offset `Point] is the offset counted in number of unicode
points.
The calls to the method [column] are similar to those to
[offset], except that they give the curren column number.
The call [pos#line_offset `Byte] is the offset of the line of
position [pos], counted in bytes. Dually, [pos#line_offset
`Point] counts the same offset in code points.
The call [pos#byte_offset] is the offset of the position [pos]
since the begininng of the file, counted in bytes.
*)
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
(* Special positions *)
val ghost : t (* Same as [Lexing.dummy_pos] *)
val min : t (* Lexing convention: line 1, offsets to 0 and file to "". *)
(* Comparisons *)
val equal : t -> t -> bool
val lt : t -> t -> bool

View File

@ -1,128 +0,0 @@
(* Regions of a file *)
let sprintf = Printf.sprintf
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Injections *)
exception Invalid
let make ~(start: Pos.t) ~(stop: Pos.t) =
if start#file <> stop#file || start#byte_offset > stop#byte_offset
then raise Invalid
else
object
val start = start
method start = start
val stop = stop
method stop = stop
method shift_bytes len =
let start = start#shift_bytes len
and stop = stop#shift_bytes len
in {< start = start; stop = stop >}
method shift_one_uchar len =
let start = start#shift_one_uchar len
and stop = stop#shift_one_uchar len
in {< start = start; stop = stop >}
method set_file name =
let start = start#set_file name
and stop = stop#set_file name
in {< start = start; stop = stop >}
(* Getters *)
method file = start#file
method pos = start, stop
method byte_pos = start#byte, stop#byte
(* Predicates *)
method is_ghost = start#is_ghost && stop#is_ghost
(* Conversions to strings *)
method to_string ?(file=true) ?(offsets=true) mode =
let horizontal = if offsets then "character" else "column"
and start_offset =
if offsets then start#offset mode else start#column mode
and stop_offset =
if offsets then stop#offset mode else stop#column mode in
let info =
if file
then sprintf "in file \"%s\", line %i, %s"
(String.escaped start#file) start#line horizontal
else sprintf "at line %i, %s" start#line horizontal
in if stop#line = start#line
then sprintf "%ss %i-%i" info start_offset stop_offset
else sprintf "%s %i to line %i, %s %i"
info start_offset stop#line horizontal stop_offset
method compact ?(file=true) ?(offsets=true) mode =
let start_str = start#anonymous ~offsets mode
and stop_str = stop#anonymous ~offsets mode in
if start#file = stop#file then
if file then sprintf "%s:%s-%s" start#file start_str stop_str
else sprintf "%s-%s" start_str stop_str
else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
end
(* Special regions *)
let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
let min = make ~start:Pos.min ~stop:Pos.min
(* Comparisons *)
let equal r1 r2 =
r1#file = r2#file
&& Pos.equal r1#start r2#start
&& Pos.equal r1#stop r2#stop
let lt r1 r2 =
r1#file = r2#file
&& not r1#is_ghost
&& not r2#is_ghost
&& Pos.lt r1#start r2#start
&& Pos.lt r1#stop r2#stop
let cover r1 r2 =
if r1#is_ghost
then r2
else if r2#is_ghost
then r1
else if lt r1 r2
then make ~start:r1#start ~stop:r2#stop
else make ~start:r2#start ~stop:r1#stop

View File

@ -1,125 +0,0 @@
(* Regions of a file
A _region_ is a contiguous series of bytes, for example, in a text
file. It is here denoted by the object type [t].
The start (included) of the region is given by the field [start],
which is a _position_, and the end (excluded) is the position given
by the field [stop]. The convention of including the start and
excluding the end enables to have empty regions if, and only if,
[start = stop]. See module [Pos] for the definition of positions.
The first byte of a file starts at the offset zero (that is,
column one), and [start] is always lower than or equal to [stop],
and they must refer to the same file.
*)
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
(* The call [region#shift_bytes n] evaluates in a region that is the
translation of region [region] of [n] bytes forward in the
file. The call [region#shift_one_uchar n] is similar, except that
it assumes that [n] is the number of bytes making up one unicode
point. The call [region#set_file f] sets the file name to be
[f]. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)
(* The method [file] returns the file name.
The method [pos] returns the values of the fields [start] and [stop].
The method [byte_pos] returns the start and end positions of the
region at hand _interpreting them as lexing positions_, that is,
the unit is the byte. *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
(* The call [region#to_string ~file ~offsets mode] evaluates in a
string denoting the region [region].
The name of the file is present if, and only if, [file = true] or
[file] is missing.
The positions in the file are expressed horizontal offsets if
[offsets = true] or [offsets] is missing (the default), otherwise
as columns.
If [mode = `Byte], those positions will be assumed to have bytes
as their unit, otherwise, if [mode = `Point], they will be
assumed to refer to code points.
The method [compact] has the same signature and calling
convention as [to_string], except that the resulting string is
more compact.
*)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Constructors *)
(* The function [make] creates a region from two positions. If the
positions are not properly ordered or refer to different files, the
exception [Invalid] is raised. *)
exception Invalid
val make : start:Pos.t -> stop:Pos.t -> t
(* Special regions *)
(* To deal with ghost expressions, that is, pieces of abstract syntax
that have not been built from excerpts of concrete syntax, we need
_ghost regions_. The module [Pos] provides a [ghost] position, and
we also provide a [ghost] region and, in type [t], the method
[is_ghost] to check it. *)
val ghost : t (* Two [Pos.ghost] positions *)
(* Occasionnally, we may need a minimum region. It is here made of two
minimal positions. *)
val min : t (* Two [Pos.min] positions *)
(* Comparisons *)
(* Two regions are equal if, and only if, they refer to the same file
and their start positions are equal and their stop positions are
equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
val equal : t -> t -> bool
(* The call [lt r1 r2] ("lower than") has the value [true] if, and
only if, regions [r1] and [r2] refer to the same file, none is a
ghost and the start position of [r1] is lower than that of
[r2]. (See [Pos.lt].) *)
val lt : t -> t -> bool
(* Given two regions [r1] and [r2], we may want the region [cover r1
r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
(cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
that name because of the [min] function above.) If [r1] is a ghost,
the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
val cover : t -> t -> t

View File

@ -1,29 +0,0 @@
type t is int * string
type u is t
type v is record foo: key; bar: mutez; baz: address end
type w is K of (U of int) (*v * u*)
storage s : w // Line comment
operations o : u;
type i is int;
(* Block comment *)
entrypoint g (const l : list (int)) is
function f (const x : int) : int is
var y : int := 5 - x
const z : int = 6
begin
y := x + y
end with y * 2
begin
match l with
[] -> null
| h#t -> q (h+2)
end;
begin
g (Unit);
fail "in extremis"
end
end

View File

@ -1,157 +0,0 @@
(* Utility types and functions *)
(* Identity *)
let id x = x
(* Combinators *)
let (<@) f g x = f (g x)
let swap f x y = f y x
let lambda = fun x _ -> x
let curry f x y = f (x,y)
let uncurry f (x,y) = f x y
(* Parametric rules for sequences *)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
let nseq_cons x (hd,tl) = x, hd::tl
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
let sepseq_cons x sep = function
None -> x, []
| Some (hd,tl) -> x, (sep,hd)::tl
(* Rightwards iterators *)
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
let nsepseq_foldl f a (hd,tl) =
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
let sepseq_foldl f a = function
None -> a
| Some s -> nsepseq_foldl f a s
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
let sepseq_iter f = function
None -> ()
| Some s -> nsepseq_iter f s
(* Reversing *)
let nseq_rev (hd,tl) =
let rec aux acc = function
[] -> acc
| x::l -> aux (nseq_cons x acc) l
in aux (hd,[]) tl
let nsepseq_rev =
let rec aux acc = function
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
| hd, [] -> hd, acc in
function
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
| s -> s
let sepseq_rev = function
None -> None
| Some seq -> Some (nsepseq_rev seq)
(* Leftwards iterators *)
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
let sepseq_foldr f = function
None -> fun a -> a
| Some s -> nsepseq_foldr f s
(* Conversions to lists *)
let nseq_to_list (x,y) = x::y
let nsepseq_to_list (x,y) = x :: List.map snd y
let sepseq_to_list = function
None -> []
| Some s -> nsepseq_to_list s
(* Optional values *)
module Option =
struct
let apply f x =
match x with
Some y -> Some (f y)
| None -> None
let rev_apply x y =
match x with
Some f -> f y
| None -> y
let to_string = function
Some x -> x
| None -> ""
end
(* Modules based on [String], like sets and maps. *)
module String =
struct
include String
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Integers *)
module Int =
struct
type t = int
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Effectful symbol generator *)
let gen_sym =
let counter = ref 0 in
fun () -> incr counter; "v" ^ string_of_int !counter
(* General tracing function *)
let trace text = function
None -> ()
| Some chan -> output_string chan text; flush chan
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg

View File

@ -1,97 +0,0 @@
(* Utility types and functions *)
(* Polymorphic identity function *)
val id : 'a -> 'a
(* Combinators *)
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
val lambda : 'a -> 'b -> 'a
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
(* Parametric rules for sequences
nseq: non-empty sequence;
sepseq: (possibly empty) sequence of separated items;
nsepseq: non-empty sequence of separated items.
*)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
val nseq_cons : 'a -> 'a nseq -> 'a nseq
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
(* Reversing *)
val nseq_rev: 'a nseq -> 'a nseq
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
(* Rightwards iterators *)
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
(* Leftwards iterators *)
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
(* Conversions to lists *)
val nseq_to_list : 'a nseq -> 'a list
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
val sepseq_to_list : ('a,'b) sepseq -> 'a list
(* Effectful symbol generator *)
val gen_sym : unit -> string
(* General tracing function *)
val trace : string -> out_channel option -> unit
(* Printing a string in red to standard error *)
val highlight : string -> unit
(* Working with optional values *)
module Option :
sig
val apply : ('a -> 'b) -> 'a option -> 'b option
val rev_apply : ('a -> 'a) option -> 'a -> 'a
val to_string : string option -> string
end
(* An extension to the standard module [String] *)
module String :
sig
include module type of String
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end
(* Integer maps *)
module Int :
sig
type t = int
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end

View File

@ -1,10 +0,0 @@
#!/bin/sh
set -e
if test -d ../../.git; then
echo true > dot_git_is_dir
else
echo false > dot_git_is_dir
cat .git >> dot_git_is_dir
fi

View File

@ -1,34 +0,0 @@
(ocamllex LexToken)
(ocamllex Lexer)
(menhir
(merge_into Parser)
(modules ParToken Parser)
(flags -la 1 --explain --external-tokens LexToken))
(library
(name ligo_parser)
(public_name ligo-parser)
(modules_without_implementation Error)
(libraries getopt hex str uutf zarith))
;; Les deux directives (rule) qui suivent sont pour le dev local.
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier.
;; Pour le purger, il faut faire "dune clean".
(rule
(targets Parser.exe)
(deps ParserMain.exe)
(action (copy ParserMain.exe Parser.exe))
(mode promote-until-clean))
(rule
(targets Lexer.exe)
(deps LexerMain.exe)
(action (copy LexerMain.exe Lexer.exe))
(mode promote-until-clean))
(rule
(targets Version.ml)
(action
(progn (run "sh" "-c" "printf 'let version = \"%s\"'\\\\n \"$(echo UNKNOWN)\" > Version.ml")))
(mode promote-until-clean))

View File

@ -1,19 +0,0 @@
opam-version : "2.0"
version : "1.0"
maintainer : "gabriel.alfour@gmail.com"
authors : [ "Galfour" ]
homepage : "https://gitlab.com/gabriel.alfour/ligo-parser"
bug-reports : "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
dev-repo : "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
license : "MIT"
depends : [ "dune" "menhir" "hex" "zarith" "getopt" "uutf" ]
build : [
[ "sh" "-c" "printf 'let version = \"%s\"' \"$(git describe --always --dirty --abbrev=0)\" > Version.ml" ]
[ "dune" "build" "-p" name "-j" jobs ]
]
url {
src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
}

View File

@ -1,3 +0,0 @@
module Parser = Parser
module Lexer = Lexer.Make(LexToken)
module AST = AST

View File

@ -1,229 +0,0 @@
(* module I = AST (\* In *\) *)
(* module SMap = Map.Make(String) *)
(* type te = type_expr list SMap.t *)
(* type ve = type_expr list SMap.t *)
(* type tve = te * ve *)
(*
module I = AST (* In *)
module SMap = Map.Make(String)
module O = struct
open AST (* TODO: for now, should disappear *)
type t = ast
and type_expr =
Prod of cartesian
| Sum of (variant, vbar) Utils.nsepseq
| Record of record_type
| TypeApp of (type_name * type_tuple)
| ParType of type_expr par
| TAlias of variable
| Function of (type_expr list) * type_expr
| Mutable of type_expr
| Unit
| TODO of string
and te = type_expr list SMap.t
and ve = type_expr list SMap.t
and vte = ve * te
and ast = {
lambdas : lambda_decl list;
block : block
}
and lambda_decl =
FunDecl of fun_decl
| ProcDecl of proc_decl
and fun_decl = {
kwd_function : kwd_function;
var : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
body : block;
kwd_with : kwd_with;
return : checked_expr
}
and proc_decl = {
kwd_procedure : kwd_procedure;
var : variable;
param : parameters;
kwd_is : kwd_is;
body : block
}
and block = {
decls : value_decls;
opening : kwd_begin;
instr : instructions;
close : kwd_end
}
and value_decls = var_decl list
and var_decl = {
kind : var_kind;
var : variable;
colon : colon;
vtype : type_expr;
setter : Region.t; (* "=" or ":=" *)
init : checked_expr
}
and checked_expr = {ty:type_expr;expr:expr}
end [@warning "-30"]
open O
open AST
open Region
let mk_checked_expr ~ty ~expr = {ty;expr}
let mk_proc_decl ~kwd_procedure ~var ~param ~kwd_is ~body =
O.{kwd_procedure; var; param; kwd_is; body}
let mk_ast ~lambdas ~block = {lambdas;block}
let mk_fun_decl ~kwd_function ~var ~param ~colon ~ret_type ~kwd_is ~body ~kwd_with ~return =
O.{kwd_function; var; param; colon; ret_type; kwd_is; body; kwd_with; return}
let unreg : 'a reg -> 'a = fun {value; _} -> value
let unpar : 'a par -> 'a = (fun (_left_par, x, _right_par) -> x) @. unreg
let nsepseq_to_list : ('a,'sep) Utils.nsepseq -> 'a list =
fun (first, rest) -> first :: (map snd rest)
let sepseq_to_list : ('a,'sep) Utils.sepseq -> 'a list =
function
None -> []
| Some nsepseq -> nsepseq_to_list nsepseq
let rec xty : I.type_expr -> O.type_expr =
function
I.Prod x -> O.Prod x
| I.Sum x -> O.Sum (unreg x)
| I.Record x -> O.Record x
| I.TypeApp x -> O.TypeApp (unreg x)
| I.ParType {region;value=(l,x,r)} -> O.ParType {region;value=(l, xty x, r)}
| I.TAlias x -> O.TAlias x
let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t)
: O.type_expr list SMap.t =
SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env
let shadow_list (name_typ_list : (string * O.type_expr) list) (env : O.type_expr list SMap.t)
: O.type_expr list SMap.t =
List.fold_left (fun acc (name, typ) -> shadow name typ acc) env name_typ_list
let type_decls_to_tenv (td : I.type_decl list) (te : te) : O.te =
td
|> List.map unreg
|> List.map (fun (_, name, _, type_expr) -> (unreg name, xty type_expr))
|> fun up -> shadow_list up te
let var_kind_to_ty : var_kind -> I.type_expr -> O.type_expr =
fun var_kind ty ->
match var_kind with
Mutable _ -> O.Mutable (xty ty)
| Const _ -> xty ty
let params_to_xty params ret_type =
unpar params
|> nsepseq_to_list
|> map (fun {value=(var_kind, _variable, _colon, type_expr);_} -> var_kind_to_ty var_kind type_expr)
|> fun param_types -> O.Function (param_types, ret_type)
let type_equal t1 t2 = match t1,t2 with
| O.Prod _x, O.Prod _y -> true (* TODO *)
| O.Sum _x, O.Sum _y -> true (* TODO *)
| _ -> false
exception TypeError of string
let check_type expr expected_type =
if type_equal expr.ty expected_type then expr
else raise (TypeError "oops")
let tc_expr (_te,_ve) expr = mk_checked_expr ~ty:(TODO "all expressions") ~expr (* TODO *)
let tc_var_decl : vte -> I.var_decl -> vte * O.var_decl =
fun (ve,te) var_decl ->
let vtype = (xty var_decl.vtype) in
let init = check_type (tc_expr (te,ve) var_decl.init) vtype in
let ve = shadow (unreg var_decl.var) vtype ve in
(ve,te), {
kind = var_decl.kind;
var = var_decl.var;
colon = var_decl.colon;
vtype;
setter = var_decl.setter;
init}
let tc_var_decls (ve,te) var_decls = fold_map tc_var_decl (ve,te) var_decls
let tc_block (te, ve : vte) (block : I.block) : vte * O.block =
let decls,opening,instr,close = block.decls, block.opening, block.instr, block.close in
let (ve,te), decls = tc_var_decls (ve,te) (decls |> unreg |> sepseq_to_list |> map unreg) in
(ve,te), O.{decls;opening;instr;close} (* TODO *)
let tc_proc_decl : vte -> I.proc_decl -> O.proc_decl =
fun vte proc_decl ->
let _vte', block' = tc_block vte (unreg proc_decl.body)
in mk_proc_decl
~kwd_procedure: proc_decl.kwd_procedure
~kwd_is: proc_decl.kwd_is
~var: proc_decl.var
~param: proc_decl.param
~body: block'
let tc_fun_decl : vte -> I.fun_decl -> O.fun_decl =
fun vte fun_decl ->
let vte', block' = tc_block vte (unreg fun_decl.body) in
let return' = tc_expr vte' fun_decl.return in
let checked_return' = check_type return' (xty fun_decl.ret_type)
in mk_fun_decl
~kwd_function: fun_decl.kwd_function
~colon: fun_decl.colon
~kwd_is: fun_decl.kwd_is
~kwd_with: fun_decl.kwd_with
~var: fun_decl.var
~param: fun_decl.param
~ret_type: (xty fun_decl.ret_type)
~body: block'
~return: checked_return'
let ve_lambda_decl : vte -> I.lambda_decl -> ve =
fun (ve,_te) ->
function
FunDecl {value;_} -> shadow value.var.value (params_to_xty value.param (xty value.ret_type)) ve
| ProcDecl {value;_} -> shadow value.var.value (params_to_xty value.param Unit) ve
let tc_lambda_decl (ve, te : vte) (whole : I.lambda_decl) : vte * O.lambda_decl =
match whole with
FunDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.FunDecl (tc_fun_decl (ve, te) value)
| ProcDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.ProcDecl (tc_proc_decl (ve, te) value)
let tc_ast (ast : I.ast) : O.ast =
(* te is the type environment, ve is the variable environment *)
let te =
SMap.empty
|> type_decls_to_tenv ast.types in
let ve =
SMap.empty
|> (match ast.parameter.value with (_,name,_,ty) -> shadow (unreg name) @@ xty ty)
|> shadow "storage" @@ xty (snd ast.storage.value)
|> shadow "operations" @@ xty (snd ast.operations.value)
in
let (ve',te'), lambdas = fold_map tc_lambda_decl (ve, te) ast.lambdas in
let (ve'', te''), block = tc_block (ve', te') (unreg ast.block) in
let _ve'' = ve'' in (* not needed anymore *)
let _te'' = te'' in (* not needed anymore *)
mk_ast ~lambdas ~block
*)