Merge branch 'feature/ligodity_shared_parser' into 'dev'

Refactor Ligodity parser to use shared parser.

See merge request ligolang/ligo!65
This commit is contained in:
Christian Rinderknecht 2019-09-27 13:33:25 +00:00
commit 85f267540b
41 changed files with 2050 additions and 1984 deletions

View File

@ -1,26 +1,27 @@
open Trace open Trace
open Parser_ligodity
module Parser = Parser_ligodity.Parser module Parser = Parser_ligodity.Parser
module AST = Parser_ligodity.AST module AST = Parser_ligodity.AST
module ParserLog = Parser_ligodity.ParserLog
module LexToken = Parser_ligodity.LexToken
module Lexer = Lexer.Make(LexToken)
let parse_file (source: string) : AST.t result = let parse_file (source: string) : AST.t result =
(* let pp_input =
* let prefix = Filename.(source |> basename |> remove_extension)
* and suffix = ".pp.ligo"
* in prefix ^ suffix in *)
(* let cpp_cmd = Printf.sprintf "cpp -traditional-cpp %s > %s"
* source pp_input in
* let%bind () = sys_command cpp_cmd in *)
let pp_input = let pp_input =
source let prefix = Filename.(source |> basename |> remove_extension)
in and suffix = ".pp.mligo"
in prefix ^ suffix in
let cpp_cmd = Printf.sprintf "cpp -traditional-cpp %s > %s"
source pp_input in
let%bind () = sys_command cpp_cmd in
let%bind channel = let%bind channel =
generic_try (simple_error "error opening file") @@ generic_try (simple_error "error opening file") @@
(fun () -> open_in pp_input) in (fun () -> open_in pp_input) in
let lexbuf = Lexing.from_channel channel in let lexbuf = Lexing.from_channel channel in
let read = Lexer.get_token in let Lexer.{read ; close ; _} =
Lexer.open_token_stream None in
specific_try (function specific_try (function
| Parser.Error -> ( | Parser.Error -> (
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
@ -34,19 +35,6 @@ let parse_file (source: string) : AST.t result =
in in
simple_error str simple_error str
) )
| Lexer.Error err -> (
let start = Lexing.lexeme_start_p lexbuf in
let end_ = Lexing.lexeme_end_p lexbuf in
let str = Format.sprintf
"Lexer error (%s) at \"%s\" from (%d, %d) to (%d, %d). In file \"%s|%s\"\n"
(err.value)
(Lexing.lexeme lexbuf)
start.pos_lnum (start.pos_cnum - start.pos_bol)
end_.pos_lnum (end_.pos_cnum - end_.pos_bol)
start.pos_fname source
in
simple_error str
)
| exn -> | exn ->
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let end_ = Lexing.lexeme_end_p lexbuf in let end_ = Lexing.lexeme_end_p lexbuf in
@ -59,13 +47,17 @@ let parse_file (source: string) : AST.t result =
start.pos_fname source start.pos_fname source
in in
simple_error str simple_error str
) @@ (fun () -> Parser.program read lexbuf) >>? fun raw -> ) @@ (fun () ->
let raw = Parser.contract read lexbuf in
close () ;
raw
) >>? fun raw ->
ok raw ok raw
let parse_string (s:string) : AST.t result = let parse_string (s:string) : AST.t result =
let lexbuf = Lexing.from_string s in let lexbuf = Lexing.from_string s in
let read = Lexer.get_token in let Lexer.{read ; close ; _} =
Lexer.open_token_stream None in
specific_try (function specific_try (function
| Parser.Error -> ( | Parser.Error -> (
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
@ -78,12 +70,17 @@ let parse_string (s:string) : AST.t result =
simple_error str simple_error str
) )
| _ -> simple_error "unrecognized parse_ error" | _ -> simple_error "unrecognized parse_ error"
) @@ (fun () -> Parser.program read lexbuf) >>? fun raw -> ) @@ (fun () ->
let raw = Parser.contract read lexbuf in
close () ;
raw
) >>? fun raw ->
ok raw ok raw
let parse_expression (s:string) : AST.expr result = let parse_expression (s:string) : AST.expr result =
let lexbuf = Lexing.from_string s in let lexbuf = Lexing.from_string s in
let read = Lexer.get_token in let Lexer.{read ; close; _} =
Lexer.open_token_stream None in
specific_try (function specific_try (function
| Parser.Error -> ( | Parser.Error -> (
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
@ -107,5 +104,9 @@ let parse_expression (s:string) : AST.expr result =
start.pos_fname s start.pos_fname s
in in
simple_error str simple_error str
) @@ (fun () -> Parser.interactive_expr read lexbuf) >>? fun raw -> ) @@ (fun () ->
let raw = Parser.interactive_expr read lexbuf in
close () ;
raw
) >>? fun raw ->
ok raw ok raw

View File

@ -4,4 +4,18 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml $HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml
$HOME/git/ligo/src/parser/shared/Lexer.mli
$HOME/git/ligo/src/parser/shared/Lexer.mll
$HOME/git/ligo/src/parser/shared/Error.mli
$HOME/git/ligo/src/parser/shared/EvalOpt.ml
$HOME/git/ligo/src/parser/shared/EvalOpt.mli
$HOME/git/ligo/src/parser/shared/FQueue.ml
$HOME/git/ligo/src/parser/shared/FQueue.mli
$HOME/git/ligo/src/parser/shared/LexerLog.mli
$HOME/git/ligo/src/parser/shared/LexerLog.ml
$HOME/git/ligo/src/parser/shared/Markup.ml
$HOME/git/ligo/src/parser/shared/Markup.mli
$HOME/git/ligo/src/parser/shared/Utils.mli
$HOME/git/ligo/src/parser/shared/Utils.ml
$HOME/git/ligo/src/parser/shared/Version.ml
Stubs/Simple_utils.ml Stubs/Simple_utils.ml

View File

@ -4,6 +4,15 @@
type 'a reg = 'a Region.reg type 'a reg = 'a Region.reg
let rec last to_region = function
[] -> Region.ghost
| [x] -> to_region x
| _::t -> last to_region t
let nsepseq_to_region to_region (hd,tl) =
let reg (_, item) = to_region item in
Region.cover (to_region hd) (last reg tl)
(* Keywords of OCaml *) (* Keywords of OCaml *)
type keyword = Region.t type keyword = Region.t
@ -136,7 +145,7 @@ and type_expr =
| TSum of (variant reg, vbar) Utils.nsepseq reg | TSum of (variant reg, vbar) Utils.nsepseq reg
| TRecord of record_type | TRecord of record_type
| TApp of (type_constr * type_tuple) reg | TApp of (type_constr * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg | TFun of (type_expr * arrow * type_expr) reg
| TPar of type_expr par reg | TPar of type_expr par reg
| TAlias of variable | TAlias of variable
@ -344,8 +353,6 @@ and conditional = {
(* Projecting regions of the input source code *) (* Projecting regions of the input source code *)
let sprintf = Printf.sprintf
let type_expr_to_region = function let type_expr_to_region = function
TProd {region; _} TProd {region; _}
| TSum {region; _} | TSum {region; _}
@ -406,358 +413,6 @@ let expr_to_region = function
| ESeq {region; _} | ERecord {region; _} | ESeq {region; _} | ERecord {region; _}
| EConstr {region; _} -> region | EConstr {region; _} -> region
(* Printing the tokens with their source locations *)
let print_nsepseq sep print (head,tail) =
let print_aux ((sep_reg:Region.t), item) =
Printf.printf "%s: %s\n" (sep_reg#compact `Byte) sep;
print item
in print head; List.iter print_aux tail
let print_sepseq sep print = function
None -> ()
| Some seq -> print_nsepseq sep print seq
let print_csv print = print_nsepseq "," print
let print_token (reg: Region.t) conc =
Printf.printf "%s: %s\n" (reg#compact `Byte) conc
let print_var Region.{region; value} =
Printf.printf "%s: Ident %s\n" (region#compact `Byte) value
let print_uident Region.{region; value} =
Printf.printf "%s: Uident %s\n" (region#compact `Byte) value
let print_str Region.{region; value} =
Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value
let print_bytes Region.{region; value=lexeme, abstract} =
Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n"
(region#compact `Byte) lexeme (Hex.to_string abstract)
let rec print_tokens {decl;eof} =
Utils.nseq_iter print_statement decl; print_token eof "EOF"
and print_statement = function
Let {value=kwd_let, let_binding; _} ->
print_token kwd_let "let";
print_let_binding let_binding
| LetEntry {value=kwd_let_entry, let_binding; _} ->
print_token kwd_let_entry "let%entry";
print_let_binding let_binding
| TypeDecl {value={kwd_type; name; eq; type_expr}; _} ->
print_token kwd_type "type";
print_var name;
print_token eq "=";
print_type_expr type_expr
and print_type_expr = function
TProd prod -> print_cartesian prod
| TSum {value; _} -> print_nsepseq "|" print_variant value
| TRecord t -> print_record_type t
| TApp app -> print_type_app app
| TPar par -> print_type_par par
| TAlias var -> print_var var
| TFun t -> print_fun_type t
and print_fun_type {value; _} =
let domain, arrow, range = value in
print_type_expr domain;
print_token arrow "->";
print_type_expr range
and print_type_app {value; _} =
let type_constr, type_tuple = value in
print_type_tuple type_tuple;
print_var type_constr
and print_type_tuple {value; _} =
let {lpar; inside; rpar} = value in
print_token lpar "(";
print_nsepseq "," print_type_expr inside;
print_token rpar ")"
and print_type_par {value={lpar;inside=t;rpar}; _} =
print_token lpar "(";
print_type_expr t;
print_token rpar ")"
and print_projection node =
let {struct_name; selector; field_path} = node in
print_var struct_name;
print_token selector ".";
print_nsepseq "." print_selection field_path
and print_selection = function
FieldName id -> print_var id
| Component {value; _} ->
let {lpar; inside; rpar} = value in
let Region.{value=lexeme,z; region} = inside in
print_token lpar "(";
print_token region
(sprintf "Int %s (%s)" lexeme (Z.to_string z));
print_token rpar ")"
and print_cartesian Region.{value;_} =
print_nsepseq "*" print_type_expr value
and print_variant {value = {constr; args}; _} =
print_uident constr;
match args with
None -> ()
| Some (kwd_of, cartesian) ->
print_token kwd_of "of";
print_cartesian cartesian
and print_record_type record_type =
print_injection print_field_decl record_type
and print_field_decl {value; _} =
let {field_name; colon; field_type} = value
in print_var field_name;
print_token colon ":";
print_type_expr field_type
and print_injection :
'a.('a -> unit) -> 'a injection reg -> unit =
fun print {value; _} ->
let {opening; elements; terminator; closing} = value in
print_opening opening;
print_sepseq ";" print elements;
print_terminator terminator;
print_closing closing
and print_opening = function
Begin region -> print_token region "begin"
| With region -> print_token region "with"
| LBrace region -> print_token region "{"
| LBracket region -> print_token region "["
and print_closing = function
End region -> print_token region "end"
| RBrace region -> print_token region "}"
| RBracket region -> print_token region "]"
and print_terminator = function
Some semi -> print_token semi ";"
| None -> ()
and print_let_binding {bindings; lhs_type; eq; let_rhs} =
List.iter print_pattern bindings;
(match lhs_type with
None -> ()
| Some (colon, type_expr) ->
print_token colon ":";
print_type_expr type_expr);
(print_token eq "="; print_expr let_rhs)
and print_pattern = function
PTuple {value=patterns;_} -> print_csv print_pattern patterns
| PList p -> print_list_pattern p
| PVar {region; value} ->
Printf.printf "%s: PVar %s\n" (region#compact `Byte) value
| PUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| PInt {region; value=lex,z} ->
print_token region (sprintf "PInt %s (%s)" lex (Z.to_string z))
| PTrue kwd_true -> print_token kwd_true "true"
| PFalse kwd_false -> print_token kwd_false "false"
| PString s -> print_str s
| PWild wild -> print_token wild "_"
| PPar {value={lpar;inside=p;rpar}; _} ->
print_token lpar "("; print_pattern p; print_token rpar ")"
| PConstr p -> print_constr_pattern p
| PRecord r -> print_record_pattern r
| PTyped t -> print_typed_pattern t
and print_list_pattern = function
Sugar p -> print_injection print_pattern p
| PCons p -> print_raw p
and print_raw {value=p1,c,p2; _} =
print_pattern p1; print_token c "::"; print_pattern p2
and print_typed_pattern {value; _} =
let {pattern; colon; type_expr} = value in
print_pattern pattern;
print_token colon ":";
print_type_expr type_expr
and print_record_pattern record_pattern =
print_injection print_field_pattern record_pattern
and print_field_pattern {value; _} =
let {field_name; eq; pattern} = value in
print_var field_name;
print_token eq "=";
print_pattern pattern
and print_constr_pattern {value=constr, p_opt; _} =
print_uident constr;
match p_opt with
None -> ()
| Some pattern -> print_pattern pattern
and print_expr = function
ELetIn {value;_} -> print_let_in value
| ECond cond -> print_conditional cond
| ETuple {value;_} -> print_csv print_expr value
| ECase {value;_} -> print_match_expr value
| EFun e -> print_fun_expr e
| EAnnot e -> print_annot_expr e
| ELogic e -> print_logic_expr e
| EArith e -> print_arith_expr e
| EString e -> print_string_expr e
| ECall {value=f,l; _} ->
print_expr f; Utils.nseq_iter print_expr l
| EVar v -> print_var v
| EProj p -> print_projection p.value
| EUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| EBytes b -> print_bytes b
| EPar {value={lpar;inside=e;rpar}; _} ->
print_token lpar "("; print_expr e; print_token rpar ")"
| EList e -> print_list_expr e
| ESeq seq -> print_sequence seq
| ERecord e -> print_record_expr e
| EConstr {value=constr,None; _} -> print_uident constr
| EConstr {value=(constr, Some arg); _} ->
print_uident constr; print_expr arg
and print_annot_expr {value=e,t; _} =
print_expr e;
print_token Region.ghost ":";
print_type_expr t
and print_list_expr = function
Cons {value={arg1;op;arg2}; _} ->
print_expr arg1;
print_token op "::";
print_expr arg2
| List e -> print_injection print_expr e
(*| Append {value=e1,append,e2; _} ->
print_expr e1;
print_token append "@";
print_expr e2 *)
and print_arith_expr = function
Add {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "+"; print_expr arg2
| Sub {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "-"; print_expr arg2
| Mult {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "*"; print_expr arg2
| Div {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "/"; print_expr arg2
| Mod {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "mod"; print_expr arg2
| Neg {value={op;arg}; _} -> print_token op "-"; print_expr arg
| Int {region; value=lex,z} ->
print_token region (sprintf "Int %s (%s)" lex (Z.to_string z))
| Mtz {region; value=lex,z} ->
print_token region (sprintf "Mtz %s (%s)" lex (Z.to_string z))
| Nat {region; value=lex,z} ->
print_token region (sprintf "Nat %s (%s)" lex (Z.to_string z))
and print_string_expr = function
Cat {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "^"; print_expr arg2
| String s -> print_str s
and print_logic_expr = function
BoolExpr e -> print_bool_expr e
| CompExpr e -> print_comp_expr e
and print_bool_expr = function
Or {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "||"; print_expr arg2
| And {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "&&"; print_expr arg2
| Not {value={op;arg}; _} -> print_token op "not"; print_expr arg
| True kwd_true -> print_token kwd_true "true"
| False kwd_false -> print_token kwd_false "false"
and print_comp_expr = function
Lt {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<"; print_expr arg2
| Leq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<="; print_expr arg2
| Gt {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op ">"; print_expr arg2
| Geq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op ">="; print_expr arg2
| Neq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<>"; print_expr arg2
| Equal {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "="; print_expr arg2
and print_record_expr e =
print_injection print_field_assign e
and print_field_assign {value; _} =
let {field_name; assignment; field_expr} = value in
print_var field_name;
print_token assignment "=";
print_expr field_expr
and print_sequence seq = print_injection print_expr seq
and print_match_expr expr =
let {kwd_match; expr; opening;
lead_vbar; cases; closing} = expr in
print_token kwd_match "match";
print_expr expr;
print_opening opening;
print_token_opt lead_vbar "|";
print_cases cases;
print_closing closing
and print_token_opt = function
None -> fun _ -> ()
| Some region -> print_token region
and print_cases {value; _} =
print_nsepseq "|" print_case_clause value
and print_case_clause {value; _} =
let {pattern; arrow; rhs} = value in
print_pattern pattern;
print_token arrow "->";
print_expr rhs
and print_let_in (bind: let_in) =
let {kwd_let; binding; kwd_in; body} = bind in
print_token kwd_let "let";
print_let_binding binding;
print_token kwd_in "in";
print_expr body
and print_fun_expr {value; _} =
let {kwd_fun; params; p_annot; arrow; body} = value in
print_token kwd_fun "fun";
(match p_annot with
None -> List.iter print_pattern params
| Some (colon, type_expr) ->
print_token colon ":";
print_type_expr type_expr);
print_token arrow "->";
print_expr body
and print_conditional {value; _} =
let open Region in
let {kwd_if; test; kwd_then; ifso; kwd_else; ifnot} = value
in print_token ghost "(";
print_token kwd_if "if";
print_expr test;
print_token kwd_then "then";
print_expr ifso;
print_token kwd_else "else";
print_expr ifnot;
print_token ghost ")"
let rec unpar = function let rec unpar = function
EPar {value={inside=expr;_}; _} -> unpar expr EPar {value={inside=expr;_}; _} -> unpar expr
| e -> e | e -> e

View File

@ -1,6 +1,8 @@
(* Abstract Syntax Tree (AST) for Ligodity *)
[@@@warning "-30"] [@@@warning "-30"]
(* Abstract Syntax Tree (AST) for Mini-ML *) open Utils
(* Regions (* Regions
@ -15,6 +17,9 @@
type 'a reg = 'a Region.reg type 'a reg = 'a Region.reg
val last : ('a -> Region.t) -> 'a list -> Region.t
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
(* Some keywords of OCaml *) (* Some keywords of OCaml *)
type keyword = Region.t type keyword = Region.t
@ -457,16 +462,6 @@ val norm : ?reg:(Region.t * kwd_fun) -> pattern Utils.nseq -> sep -> expr -> fun
let f l = let n = l in n let f l = let n = l in n
*) *)
(* Printing the tokens reconstructed from the AST. This is very useful
for debugging, as the output of [print_token ast] can be textually
compared to that of [Lexer.trace] (see module [LexerMain]). The
optional parameter [undo] is bound to [true] if the caller wants
the AST to be unparsed before printing (those nodes that have been
normalised with function [norm_let] and [norm_fun]). *)
val print_tokens : (*?undo:bool ->*) ast -> unit
(* Projecting regions from sundry nodes of the AST. See the first (* Projecting regions from sundry nodes of the AST. See the first
comment at the beginning of this file. *) comment at the beginning of this file. *)
@ -481,9 +476,3 @@ val type_expr_to_region : type_expr -> Region.t
contains. *) contains. *)
val unpar : expr -> expr val unpar : expr -> expr
(* TODO *)
val print_projection : projection -> unit
val print_pattern : pattern -> unit
val print_expr : expr -> unit

View File

@ -1,123 +0,0 @@
(* Parsing the command-line option for CameLIGO *)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t
}
let abort msg =
Utils.highlight (Printf.sprintf "Command-line error: %s" msg); exit 1
let printf = Printf.printf
let sprintf = Printf.sprintf
let print = print_endline
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.mligo | \"-\"]\n" file;
print "where <input>.mligo is the CameLIGO source file (default: stdin),";
print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)";
print " --verbose=<phases> Colon-separated phases: cmdline, lexer, parser";
print " --version Send short commit hash to stdout";
print " -h, --help This help";
exit 0
(* Version *)
let version () = printf "%s\n" Version.version; exit 0
(* Specifying the command-line options a la GNU *)
let input = ref None
and verbose = ref Utils.String.Set.empty
and libs = ref []
and verb_str = ref ""
let split_at_colon = Str.(split (regexp ":"))
let add_path p = libs := !libs @ split_at_colon p
let add_verbose d =
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
!verbose
(split_at_colon d)
let specs =
let open! Getopt in [
'I', nolong, None, Some add_path;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some help, None;
noshort, "version", Some version, None
]
;;
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
(* Checking options *)
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
let string_of_path p =
let apply s a = if a = "" then s else s ^ ":" ^ a
in List.fold_right apply p ""
let quote s = Printf.sprintf "\"%s\"" s
let print_opt () =
printf "COMMAND LINE\n";
printf "input = %s\n" (string_of quote !input);
printf "verbose = %s\n" !verb_str;
printf "libs = %s\n" (string_of_path !libs)
let check () =
let () =
if Utils.String.Set.mem "cmdline" !verbose then print_opt () in
let input =
match !input with
None | Some "-" -> !input
| Some file_path ->
if Filename.check_suffix file_path ".mligo"
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .mligo." in
(* Exporting remaining options as non-mutable values *)
let verbose = !verbose
and libs = !libs in
let () =
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "input = %s\n" (string_of quote input);
printf "verbose = %s\n" !verb_str;
printf "libs = %s\n" (string_of_path libs)
end
in {input; libs; verbose}
(* Parsing the command-line options *)
let read () =
try
Getopt.parse_cmdline specs anonymous;
(verb_str :=
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose "");
check ()
with Getopt.Error msg -> abort msg

View File

@ -1,23 +0,0 @@
(* Command-line options for CameLIGO *)
(* The type [options] gathers the command-line options.
If the field [input] is [Some src], the name of the CameLIGO
source file, with the extension ".mligo", is [src]. If [input] is
[Some "-"] or [None], the source file is read from standard input.
The field [libs] is made of library paths (colon-separated).
The field [verbose] is a set of stages of the compiler chain,
about which more information may be displayed.
*)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t;
}
(* Parsing the command-line options on stdin *)
val read : unit -> options

View File

@ -0,0 +1,168 @@
(* This signature defines the lexical tokens for LIGO
_Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of
tokens is the minimal model of the input program, carrying
implicitly all its structure in a linear encoding, and nothing
else, in particular, comments and whitespace are absent.
A _lexeme_ is a specific character string (concrete
representation) denoting a token (abstract representation). Tokens
can be thought of as sets, and lexemes as elements of those sets --
there is often an infinite number of lexemes, but a small number of
tokens. (Think of identifiers as lexemes and one token.)
The tokens are qualified here as being "lexical" because the
parser generator Menhir expects to define them, in which context
they are called "parsing tokens", and they are made to match each
other. (This is an idiosyncratic terminology.)
The type of the lexical tokens is the variant [t], also
aliased to [token].
*)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
type lexeme = string
(* TOKENS *)
type t =
(* Symbols *)
ARROW of Region.t (* "->" *)
| CONS of Region.t (* "::" *)
| CAT of Region.t (* "^" *)
(*| APPEND (* "@" *)*)
(* Arithmetics *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
(* Compounds *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
(* Separators *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
(* Wildcard *)
| WILD of Region.t (* "_" *)
(* Comparisons *)
| EQ of Region.t (* "=" *)
| NE of Region.t (* "<>" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "=<" *)
| GE of Region.t (* ">=" *)
| BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t(* "&&" *)
(* Identifiers, labels, numbers and strings *)
| Ident of string Region.reg
| Constr of string Region.reg
| Int of (string * Z.t) Region.reg
| Nat of (string * Z.t) Region.reg
| Mtz of (string * Z.t) Region.reg
| Str of string Region.reg
| Bytes of (string * Hex.t) Region.reg
(* Keywords *)
(*| And*)
| Begin of Region.t
| Else of Region.t
| End of Region.t
| False of Region.t
| Fun of Region.t
| If of Region.t
| In of Region.t
| Let of Region.t
| Match of Region.t
| Mod of Region.t
| Not of Region.t
| Of of Region.t
| Or of Region.t
| Then of Region.t
| True of Region.t
| Type of Region.t
| With of Region.t
(* Liquidity specific *)
| LetEntry of Region.t
| MatchNat of Region.t
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF of Region.t (* End of file *)
type token = t
(* Projections
The difference between extracting the lexeme and a string from a
token is that the latter is the textual representation of the OCaml
value denoting the token (its abstract syntax), rather than its
lexeme (concrete syntax).
*)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Injections *)
type int_err =
Non_canonical_zero
type ident_err = Reserved_name
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result
val mk_mtz : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool

View File

@ -0,0 +1,536 @@
{
type lexeme = string
let sprintf = Printf.sprintf
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
module SMap = Utils.String.Map
module SSet = Utils.String.Set
(* TOKENS *)
type t =
(* Symbols *)
ARROW of Region.t (* "->" *)
| CONS of Region.t (* "::" *)
| CAT of Region.t (* "^" *)
(*| APPEND (* "@" *)*)
(* Arithmetics *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
(* Compounds *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
(* Separators *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
(* Wildcard *)
| WILD of Region.t (* "_" *)
(* Comparisons *)
| EQ of Region.t (* "=" *)
| NE of Region.t (* "<>" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "=<" *)
| GE of Region.t (* ">=" *)
| BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *)
(* Identifiers, labels, numbers and strings *)
| Ident of string Region.reg
| Constr of string Region.reg
| Int of (string * Z.t) Region.reg
| Nat of (string * Z.t) Region.reg
| Mtz of (string * Z.t) Region.reg
| Str of string Region.reg
| Bytes of (string * Hex.t) Region.reg
(* Keywords *)
(*| And*)
| Begin of Region.t
| Else of Region.t
| End of Region.t
| False of Region.t
| Fun of Region.t
| If of Region.t
| In of Region.t
| Let of Region.t
| Match of Region.t
| Mod of Region.t
| Not of Region.t
| Of of Region.t
| Or of Region.t
| Then of Region.t
| True of Region.t
| Type of Region.t
| With of Region.t
(* Liquidity specific *)
| LetEntry of Region.t
| MatchNat of Region.t
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF of Region.t (* End of file *)
type token = t
let proj_token = function
| ARROW region -> region, "ARROW"
| CONS region -> region, "CONS"
| CAT region -> region, "CAT"
| MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| COMMA region -> region, "COMMA"
| SEMI region -> region, "SEMI"
| VBAR region -> region, "VBAR"
| COLON region -> region, "COLON"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| EQ region -> region, "EQ"
| NE region -> region, "NE"
| LT region -> region, "LT"
| GT region -> region, "GT"
| LE region -> region, "LE"
| GE region -> region, "GE"
| BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mtz Region.{region; value = s,n} ->
region, sprintf "Mtz (\"%s\", %s)" s (Z.to_string n)
| Str Region.{region; value} ->
region, sprintf "Str %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.to_string b)
| Begin region -> region, "Begin"
| Else region -> region, "Else"
| End region -> region, "End"
| False region -> region, "False"
| Fun region -> region, "Fun"
| If region -> region, "If"
| In region -> region, "In"
| Let region -> region, "Let"
| Match region -> region, "Match"
| Mod region -> region, "Mod"
| Not region -> region, "Not"
| Of region -> region, "Of"
| Or region -> region, "Or"
| Then region -> region, "Then"
| True region -> region, "True"
| Type region -> region, "Type"
| With region -> region, "With"
| LetEntry region -> region, "LetEntry"
| MatchNat region -> region, "MatchNat"
| EOF region -> region, "EOF"
let to_lexeme = function
| ARROW _ -> "->"
| CONS _ -> "::"
| CAT _ -> "^"
| MINUS _ -> "-"
| PLUS _ -> "+"
| SLASH _ -> "/"
| TIMES _ -> "*"
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| COMMA _ -> ","
| SEMI _ -> ";"
| VBAR _ -> "|"
| COLON _ -> ":"
| DOT _ -> "."
| WILD _ -> "_"
| EQ _ -> "="
| NE _ -> "<>"
| LT _ -> "<"
| GT _ -> ">"
| LE _ -> "=<"
| GE _ -> ">="
| BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&"
| Ident id -> id.Region.value
| Constr id -> id.Region.value
| Int i
| Nat i
| Mtz i -> fst i.Region.value
| Str s -> s.Region.value
| Bytes b -> fst b.Region.value
| Begin _ -> "begin"
| Else _ -> "else"
| End _ -> "end"
| False _ -> "false"
| Fun _ -> "fun"
| If _ -> "if"
| In _ -> "in"
| Let _ -> "let"
| Match _ -> "match"
| Mod _ -> "mod"
| Not _ -> "not"
| Of _ -> "of"
| Or _ -> "or"
| True _ -> "true"
| Type _ -> "type"
| Then _ -> "then"
| With _ -> "with"
| LetEntry _ -> "let%entry"
| MatchNat _ -> "match%nat"
| EOF _ -> ""
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str
let to_region token = proj_token token |> fst
(* Injections *)
type int_err =
Non_canonical_zero
(* LEXIS *)
let keywords = [
(fun reg -> Begin reg);
(fun reg -> Else reg);
(fun reg -> End reg);
(fun reg -> False reg);
(fun reg -> Fun reg);
(fun reg -> If reg);
(fun reg -> In reg);
(fun reg -> Let reg);
(fun reg -> Match reg);
(fun reg -> Mod reg);
(fun reg -> Not reg);
(fun reg -> Of reg);
(fun reg -> Or reg);
(fun reg -> Then reg);
(fun reg -> True reg);
(fun reg -> Type reg);
(fun reg -> With reg);
(fun reg -> LetEntry reg);
(fun reg -> MatchNat reg);
]
let reserved =
let open SSet in
empty
|> add "and"
|> add "as"
|> add "asr"
|> add "class"
|> add "constraint"
|> add "do"
|> add "done"
|> add "downto"
|> add "exception"
|> add "external"
|> add "for"
|> add "function"
|> add "functor"
|> add "inherit"
|> add "initializer"
|> add "land"
|> add "lazy"
|> add "lor"
|> add "lsl"
|> add "lsr"
|> add "lxor"
|> add "method"
|> add "module"
|> add "mutable"
|> add "new"
|> add "nonrec"
|> add "object"
|> add "open"
|> add "private"
|> add "rec"
|> add "sig"
|> add "struct"
|> add "to"
|> add "try"
|> add "val"
|> add "virtual"
|> add "when"
|> add "while"
let constructors = [
(fun reg -> False reg);
(fun reg -> True reg);
]
let add map (key, value) = SMap.add key value map
let mk_map mk_key list =
let apply map value = add map (mk_key value, value)
in List.fold_left apply SMap.empty list
type lexis = {
kwd : (Region.t -> token) SMap.t;
cstr : (Region.t -> token) SMap.t;
res : SSet.t
}
let lexicon : lexis =
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
in {kwd = build keywords;
cstr = build constructors;
res = reserved}
type ident_err = Reserved_name
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let digit = ['0'-'9']
let ident = small (letter | '_' | digit | '%')*
let constr = capital (letter | '_' | digit)*
(* Rules *)
rule scan_ident region lexicon = parse
(ident as value) eof {
if SSet.mem value lexicon.res
then Error Reserved_name
else Ok (match SMap.find_opt value lexicon.kwd with
Some mk_kwd -> mk_kwd region
| None -> Ident Region.{region; value}) }
and scan_constr region lexicon = parse
(constr as value) eof {
match SMap.find_opt value lexicon.cstr with
Some mk_cstr -> mk_cstr region
| None -> Constr Region.{region; value} }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Smart constructors (injections) *)
let mk_string lexeme region = Str Region.{region; value=lexeme}
let mk_bytes lexeme region =
let norm = Str.(global_replace (regexp "_") "" lexeme) in
let value = lexeme, Hex.of_string norm
in Bytes Region.{region; value}
let mk_int lexeme region =
let z = Str.(global_replace (regexp "_") "" lexeme)
|> Z.of_string in
if Z.equal z Z.zero && lexeme <> "0"
then Error Non_canonical_zero
else Ok (Int Region.{region; value = lexeme, z})
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
let mk_nat lexeme region =
match (String.index_opt lexeme 'p') with
| None -> Error Invalid_natural
| Some _ -> (
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "p") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0p"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme, z})
)
let mk_mtz lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mtz") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mtz"
then Error Non_canonical_zero
else Ok (Mtz Region.{region; value = lexeme, z})
let eof region = EOF region
let mk_sym lexeme region =
match lexeme with
"->" -> ARROW region
| "::" -> CONS region
| "^" -> CAT region
| "-" -> MINUS region
| "+" -> PLUS region
| "/" -> SLASH region
| "*" -> TIMES region
| "[" -> LBRACKET region
| "]" -> RBRACKET region
| "{" -> LBRACE region
| "}" -> RBRACE region
| "," -> COMMA region
| ";" -> SEMI region
| "|" -> VBAR region
| ":" -> COLON region
| "." -> DOT region
| "_" -> WILD region
| "=" -> EQ region
| "<>" -> NE region
| "<" -> LT region
| ">" -> GT region
| "=<" -> LE region
| ">=" -> GE region
| "||" -> BOOL_OR region
| "&&" -> BOOL_AND region
| "(" -> LPAR region
| ")" -> RPAR region
| a -> failwith ("Not understood token: " ^ a)
(* Identifiers *)
let mk_ident' lexeme region lexicon =
Lexing.from_string lexeme |> scan_ident region lexicon
let mk_ident lexeme region = mk_ident' lexeme region lexicon
(* Constructors *)
let mk_constr' lexeme region lexicon =
Lexing.from_string lexeme |> scan_constr region lexicon
let mk_constr lexeme region = mk_constr' lexeme region lexicon
(* Predicates *)
let is_string = function
Str _ -> true
| _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
| Begin _
| Else _
| End _
| False _
| Fun _
| If _
| In _
| Let _
| Match _
| Mod _
| Not _
| Of _
| Or _
| Then _
| True _
| Type _
| LetEntry _
| MatchNat _
| With _ -> true
| _ -> false
let is_constr = function
| Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let is_sym = function
| ARROW _
| CONS _
| CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| WILD _
| EQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| BOOL_OR _
| BOOL_AND _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false
(* END TRAILER *)
}

View File

@ -1,47 +0,0 @@
(* Simple lexer for the Mini-ML language *)
(* Error reporting *)
type message = string
exception Error of message Region.reg
(* Tokeniser *)
(* The call [get_token ~log] evaluates in a lexer (a.k.a
tokeniser or scanner) whose type is [Lexing.lexbuf -> Token.t].
The argument [log] is a logger. As its type shows and suggests,
it is a pair made of an output channel and a printer for
tokens. The lexer would use any logger to print the recognised
tokens to the given channel. If no logger is given to [get_token],
no printing takes place while the lexer runs.
The call [reset ~file ~line ~offset buffer] modifies in-place the
lexing buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], the current line is
[line] and the offset on that line is [offset]. This function is
useful when lexing a file that has been previously preprocessed by
the C preprocessor, in which case the argument [file] is the name
of the file that was preprocessed, _not_ the preprocessed file (of
which the user is not normally aware). By default, the [line]
argument is [1].
*)
type logger = out_channel * (out_channel -> Token.t -> unit)
val get_token : ?log:logger -> Lexing.lexbuf -> Token.t
val reset : ?file:string -> ?line:int -> ?offset:int -> Lexing.lexbuf -> unit
val reset_file : file:string -> Lexing.lexbuf -> unit
(* Debugging *)
type file_path = string
val iter :
(Lexing.lexbuf -> out_channel -> Token.t -> unit) -> file_path option -> unit
val trace : file_path option -> unit
val prerr : kind:string -> message Region.reg -> unit
val format_error : kind:string -> message Region.reg -> string
val output_token : Lexing.lexbuf -> out_channel -> Token.t -> unit

View File

@ -1,454 +0,0 @@
(* Lexer specification for Mini-ML, to be processed by [ocamllex]. *)
{
(* START HEADER *)
(* UTILITIES *)
let sprintf = Printf.sprintf
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
module SMap = Utils.String.Map
(* Making a natural from its decimal notation (for Tez) *)
let format_tz s =
match String.index s '.' with
index ->
let len = String.length s in
let integral = Str.first_chars s index
and fractional = Str.last_chars s (len-index-1) in
let num = Z.of_string (integral ^ fractional)
and den = Z.of_string ("1" ^ String.make (len-index-1) '0')
and million = Q.of_string "1000000" in
let mtz = Q.make num den |> Q.mul million in
let should_be_1 = Q.den mtz in
if Z.equal Z.one should_be_1 then Some (Q.num mtz) else None
| exception Not_found -> assert false
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
type thread = {
opening : Region.t;
len : int;
acc : char list
}
let push_char char {opening; len; acc} =
{opening; len=len+1; acc=char::acc}
let push_string str {opening; len; acc} =
{opening;
len = len + String.length str;
acc = explode str acc}
(* LEXICAL ERRORS *)
type message = string
exception Error of message Region.reg
let error lexbuf msg =
let start = Pos.from_byte (Lexing.lexeme_start_p lexbuf)
and stop = Pos.from_byte (Lexing.lexeme_end_p lexbuf) in
let region = Region.make ~start ~stop
in raise (Error Region.{region; value=msg})
let fail region value = raise (Error Region.{region; value})
(* KEYWORDS *)
let keywords = Token.[
"begin", Some Begin;
"else", Some Else;
"false", Some False;
"fun", Some Fun;
"if", Some If;
"in", Some In;
"end", Some End;
"let", Some Let;
"match", Some Match;
"mod", Some Mod;
"not", Some Not;
"of", Some Of;
"or", Some Or;
"then", Some Then;
"true", Some True;
"type", Some Type;
"with", Some With;
(* Reserved *)
"and", None;
"as", None;
"asr", None;
(* "assert", None;*)
"class", None;
"constraint", None;
"do", None;
"done", None;
"downto", None;
"exception", None;
"external", None;
"for", None;
"function", None;
"functor", None;
"include", None;
"inherit", None;
"initializer", None;
"land", None;
"lazy", None;
"lor", None;
"lsl", None;
"lsr", None;
"lxor", None;
"method", None;
"module", None;
"mutable", None;
"new", None;
"nonrec", None;
"object", None;
"open", None;
"private", None;
"rec", None;
"sig", None;
"struct", None;
"to", None;
"try", None;
"val", None;
"virtual", None;
"when", None;
"while", None
]
let add map (key,value) = SMap.add key value map
let kwd_map = List.fold_left add SMap.empty keywords
(* LEXER ENGINE *)
(* Resetting file name and line number (according to #line directives) *)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback lexbuf =
let open Lexing in
let len = String.length (lexeme lexbuf) in
lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - len;
lexbuf.lex_curr_p <-
{lexbuf.lex_curr_p with pos_cnum = lexbuf.lex_curr_p.pos_cnum - len}
*)
(* REGIONS *)
let mk_region start stop =
let start = Pos.from_byte start
and stop = Pos.from_byte stop
in Region.make ~start ~stop
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Auxiliary regular expressions *)
let nl = ['\n' '\r']
let blank = [' ' '\t']
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
let decimal = digit+ '.' digit+
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let ichar = letter | digit | ['_' '\'']
let ident = small ichar* | '_' ichar+
let uident = capital ichar*
let tparam = "'" ident (* Type parameters. Unused yet *)
let hexa = digit | ['A'-'F']
let byte = hexa hexa
let byte_seq = byte | byte (byte | '_')* byte
let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\\" | "\\b" | "\\r" | "\\t"
let schar = [^'"' '\\'] # nl (* TODO: Test *)
| "\\\"" | esc | "\\x" byte | "\\0" digit digit
let string = '"' schar* '"'
let char_set = [^'\'' '\\'] # nl (* TODO: Test *)
| "\\'" | esc | "\\x" byte | "\\0" digit digit
let char = "'" char_set "'"
(* Rules *)
rule scan = parse
nl { Lexing.new_line lexbuf; scan lexbuf }
| blank+ { scan lexbuf }
| "->" { Token.ARROW }
| "::" { Token.CONS }
| "^" { Token.CAT }
(*| "@" { Token.APPEND }*)
| "=" { Token.EQ }
| "<>" { Token.NE }
| "<" { Token.LT }
| ">" { Token.GT }
| "<=" { Token.LE }
| ">=" { Token.GE }
| "&&" { Token.BOOL_AND }
| "||" { Token.BOOL_OR }
| "-" { Token.MINUS }
| "+" { Token.PLUS }
| "/" { Token.SLASH }
| "*" { Token.TIMES }
| "(" { Token.LPAR }
| ")" { Token.RPAR }
| "[" { Token.LBRACKET }
| "]" { Token.RBRACKET }
| "{" { Token.LBRACE }
| "}" { Token.RBRACE }
| "," { Token.COMMA }
| ";" { Token.SEMI }
| ":" { Token.COLON }
| "|" { Token.VBAR }
| "." { Token.DOT }
| "_" { Token.WILD }
| eof { Token.EOF }
| integer as n { Token.Int (n, Z.of_string n) }
| integer as n "p" { Token.Nat (n ^ "p", Z.of_string n) }
| integer as tz "tz" { Token.Mtz (tz ^ "tz", Z.mul (Z.of_int 1_000_000) (Z.of_string tz)) }
| decimal as tz "tz" {
match format_tz tz with
Some z -> Token.Mtz (tz ^ "tz", z)
| None -> sprintf "Invalid tez amount." |> error lexbuf
}
| uident as id { Token.Constr id }
| bytes {
let norm = Str.(global_replace (regexp "_") "" seq)
in Token.Bytes (seq, Hex.of_string norm)
}
| "let%init" { Token.Let }
| "let%entry" { Token.LetEntry }
| "match%nat" { Token.MatchNat }
| ident as id {
match SMap.find id kwd_map with
None -> sprintf "Reserved name \"%s\"." id |> error lexbuf
| Some kwd -> kwd
| exception Not_found -> Token.Ident id }
| '"' { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening = mk_region start stop in
let thread = {opening; len=1; acc=['"']} in
let thread = scan_string thread lexbuf in
let lexeme = mk_str thread.len thread.acc in
let () = lexbuf.Lexing.lex_start_p <- start
in Token.Str lexeme }
| "(*" { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening = mk_region start stop in
let thread = {opening; len=2; acc=['*';'(']} in
let thread = scan_block thread lexbuf in
let () = ignore thread
in scan lexbuf }
(* Management of #include CPP directives
An input LIGO program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for LIGO thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let flags = scan_flags [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let () = reset ~file ~line ~offset:0 lexbuf
in scan lexbuf
}
| _ as c { let msg = sprintf "Invalid character '%s'."
(Char.escaped c)
in error lexbuf msg }
(* Scanning CPP #include flags *)
and scan_flags acc = parse
blank+ { scan_flags acc lexbuf }
| integer as code { let acc = int_of_string code :: acc
in scan_flags acc lexbuf }
| nl { Lexing.new_line lexbuf; List.rev acc }
| eof { List.rev acc }
(* Finishing a string *)
and scan_string thread = parse
nl { fail thread.opening "Broken string." }
| eof { fail thread.opening "Unterminated string." }
| '"' { push_char '"' thread }
| esc as s { scan_string (push_string s thread) lexbuf }
| '\\' _ { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let region = mk_region start stop
in fail region "Undefined escape sequence." }
| _ as c { scan_string (push_char c thread) lexbuf }
(* Comments *)
and scan_block thread = parse
'"' | "(*" {
let opening = thread.opening in
let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening' = mk_region start stop in
let lexeme = Lexing.lexeme lexbuf in
let thread = push_string lexeme thread in
let thread = {thread with opening=opening'} in
let next = if lexeme = "\"" then scan_string
else scan_block in
let thread = next thread lexbuf in
let thread = {thread with opening}
in scan_block thread lexbuf }
| "*)" { push_string (Lexing.lexeme lexbuf) thread }
| nl { Lexing.new_line lexbuf; scan_block thread lexbuf }
| eof { fail thread.opening "Open comment." }
| _ as c { scan_block (push_char c thread) lexbuf }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
type logger = out_channel * (out_channel -> Token.t -> unit)
let get_token ?log =
match log with
None -> scan
| Some (out_chan, print) ->
let print = print out_chan in
fun buffer -> let t = scan buffer in print t; flush out_chan; t
(* Standalone lexer for debugging purposes *)
(* TODO: Move out (functor). See LIGO. *)
let format_error ~(kind: string) Region.{region; value=msg} =
sprintf "%s error %s:\n%s%!"
kind (region#to_string `Byte) msg
let prerr ~(kind: string) msg =
Utils.highlight (format_error ~kind msg)
type file_path = string
let output_token buffer chan token =
let open Lexing in
let conc = Token.to_string token in
let start = Pos.from_byte buffer.lex_start_p
and stop = Pos.from_byte buffer.lex_curr_p in
Printf.fprintf chan "%s-%s: %s\n%!"
(start#compact `Byte) (stop#compact `Byte) conc
let iter action file_opt =
try
let cin, reset =
match file_opt with
None | Some "-" -> stdin, ignore
| Some file -> open_in file, reset_file ~file in
let buffer = Lexing.from_channel cin in
let rec iter () =
try
let t = scan buffer in
action buffer stdout t;
if t = Token.EOF then (close_in cin; close_out stdout)
else iter ()
with Error diag ->
close_in cin; close_out stdout;
prerr ~kind:"Lexical" diag
in reset buffer; iter ()
with Sys_error msg -> Utils.highlight msg
let trace = iter output_token
(* END TRAILER *)
}

View File

@ -1,4 +1,4 @@
(* Driver for the lexer of CameLIGO *) (* Driver for the lexer of Ligodity *)
(* Error printing and exception tracing *) (* Error printing and exception tracing *)
@ -6,11 +6,56 @@ let () = Printexc.record_backtrace true
(* Running the lexer on the source *) (* Running the lexer on the source *)
let options = EvalOpt.read () let options = EvalOpt.read "Ligodity" ".mligo"
open EvalOpt;; open EvalOpt
if Utils.String.Set.mem "lexer" options.verbose then let external_ text =
Lexer.trace options.input Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
else Lexer.iter (fun _lexbuf _out _token -> ()) options.input
;; (* Path for CPP inclusions (#include) *)
let lib_path =
match options.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Preprocessing the input source and opening the input channels *)
let prefix =
match options.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.mligo"
let pp_input =
if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match options.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s > %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken)
module Log = LexerLog.Make (Lexer)
let () = Log.trace ~offsets:options.offsets
options.mode (Some pp_input) options.cmd

View File

@ -1,75 +1,70 @@
%{ %{
%} %}
(* Tokens (mirroring those defined in module Token) *) %token <Region.t> MINUS
%token <Region.t> PLUS
%token <Region.t> SLASH
%token <Region.t> TIMES
%token MINUS %token <Region.t> LPAR
%token PLUS %token <Region.t> RPAR
%token SLASH %token <Region.t> LBRACKET
%token TIMES %token <Region.t> RBRACKET
%token <Region.t> LBRACE
%token <Region.t> RBRACE
%token LPAR %token <Region.t> ARROW
%token RPAR %token <Region.t> CONS
%token LBRACKET %token <Region.t> CAT
%token RBRACKET
%token LBRACE
%token RBRACE
%token ARROW
%token CONS
%token CAT
(*%token APPEND*) (*%token APPEND*)
%token DOT %token <Region.t> DOT
%token COMMA %token <Region.t> COMMA
%token SEMI %token <Region.t> SEMI
%token COLON %token <Region.t> COLON
%token VBAR %token <Region.t> VBAR
%token WILD %token <Region.t> WILD
%token EQ %token <Region.t> EQ
%token NE %token <Region.t> NE
%token LT %token <Region.t> LT
%token GT %token <Region.t> GT
%token LE %token <Region.t> LE
%token GE %token <Region.t> GE
%token BOOL_OR %token <Region.t> BOOL_OR
%token BOOL_AND %token <Region.t> BOOL_AND
%token <string> Ident %token <string Region.reg> Ident
%token <string> Constr %token <string Region.reg> Constr
%token <string> Str %token <string Region.reg> Str
%token <string * Z.t> Int %token <(string * Z.t) Region.reg> Int
%token <string * Z.t> Mtz %token <(string * Z.t) Region.reg> Nat
%token <string * Z.t> Nat %token <(string * Z.t) Region.reg> Mtz
(*%token And*) (*%token And*)
%token Begin %token <Region.t> Begin
%token Else %token <Region.t> Else
%token End %token <Region.t> End
%token False %token <Region.t> False
%token Fun %token <Region.t> Fun
%token If %token <Region.t> If
%token In %token <Region.t> In
%token Let %token <Region.t> Let
%token List %token <Region.t> Match
%token Map %token <Region.t> Mod
%token Match %token <Region.t> Not
%token Mod %token <Region.t> Of
%token Not %token <Region.t> Or
%token Of %token <Region.t> Then
%token Or %token <Region.t> True
%token Set %token <Region.t> Type
%token Then %token <Region.t> With
%token True %token <Region.t> LetEntry
%token Type %token <Region.t> MatchNat
%token With
%token LetEntry
%token MatchNat
%token EOF %token <Region.t> EOF
%% %%

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,363 @@
open AST
open! Region
(* Printing the tokens with their source locations *)
let sprintf = Printf.sprintf
let offsets = ref true
let mode = ref `Point
let compact (region: Region.t) =
region#compact ~offsets:!offsets !mode
let print_nsepseq sep print (head,tail) =
let print_aux ((sep_reg:Region.t), item) =
Printf.printf "%s: %s\n" (compact sep_reg) sep;
print item
in print head; List.iter print_aux tail
let print_sepseq sep print = function
None -> ()
| Some seq -> print_nsepseq sep print seq
let print_csv print = print_nsepseq "," print
let print_token (reg: Region.t) conc =
Printf.printf "%s: %s\n" (compact reg) conc
let print_var Region.{region; value} =
Printf.printf "%s: Ident %s\n" (compact region) value
let print_uident Region.{region; value} =
Printf.printf "%s: Uident %s\n" (compact region) value
let print_str Region.{region; value} =
Printf.printf "%s: Str \"%s\"\n" (compact region) value
let print_bytes Region.{region; value=lexeme, abstract} =
Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n"
(compact region) lexeme (Hex.to_string abstract)
let rec print_tokens {decl;eof} =
Utils.nseq_iter print_statement decl; print_token eof "EOF"
and print_statement = function
Let {value=kwd_let, let_binding; _} ->
print_token kwd_let "let";
print_let_binding let_binding
| LetEntry {value=kwd_let_entry, let_binding; _} ->
print_token kwd_let_entry "let%entry";
print_let_binding let_binding
| TypeDecl {value={kwd_type; name; eq; type_expr}; _} ->
print_token kwd_type "type";
print_var name;
print_token eq "=";
print_type_expr type_expr
and print_type_expr = function
TProd prod -> print_cartesian prod
| TSum {value; _} -> print_nsepseq "|" print_variant value
| TRecord t -> print_record_type t
| TApp app -> print_type_app app
| TPar par -> print_type_par par
| TAlias var -> print_var var
| TFun t -> print_fun_type t
and print_fun_type {value; _} =
let domain, arrow, range = value in
print_type_expr domain;
print_token arrow "->";
print_type_expr range
and print_type_app {value; _} =
let type_constr, type_tuple = value in
print_type_tuple type_tuple;
print_var type_constr
and print_type_tuple {value; _} =
let {lpar; inside; rpar} = value in
print_token lpar "(";
print_nsepseq "," print_type_expr inside;
print_token rpar ")"
and print_type_par {value={lpar;inside=t;rpar}; _} =
print_token lpar "(";
print_type_expr t;
print_token rpar ")"
and print_projection node =
let {struct_name; selector; field_path} = node in
print_var struct_name;
print_token selector ".";
print_nsepseq "." print_selection field_path
and print_selection = function
FieldName id -> print_var id
| Component {value; _} ->
let {lpar; inside; rpar} = value in
let Region.{value=lexeme,z; region} = inside in
print_token lpar "(";
print_token region
(sprintf "Int %s (%s)" lexeme (Z.to_string z));
print_token rpar ")"
and print_cartesian Region.{value;_} =
print_nsepseq "*" print_type_expr value
and print_variant {value = {constr; args}; _} =
print_uident constr;
match args with
None -> ()
| Some (kwd_of, cartesian) ->
print_token kwd_of "of";
print_cartesian cartesian
and print_record_type record_type =
print_injection print_field_decl record_type
and print_field_decl {value; _} =
let {field_name; colon; field_type} = value
in print_var field_name;
print_token colon ":";
print_type_expr field_type
and print_injection :
'a.('a -> unit) -> 'a injection reg -> unit =
fun print {value; _} ->
let {opening; elements; terminator; closing} = value in
print_opening opening;
print_sepseq ";" print elements;
print_terminator terminator;
print_closing closing
and print_opening = function
Begin region -> print_token region "begin"
| With region -> print_token region "with"
| LBrace region -> print_token region "{"
| LBracket region -> print_token region "["
and print_closing = function
End region -> print_token region "end"
| RBrace region -> print_token region "}"
| RBracket region -> print_token region "]"
and print_terminator = function
Some semi -> print_token semi ";"
| None -> ()
and print_let_binding {bindings; lhs_type; eq; let_rhs} =
List.iter print_pattern bindings;
(match lhs_type with
None -> ()
| Some (colon, type_expr) ->
print_token colon ":";
print_type_expr type_expr);
(print_token eq "="; print_expr let_rhs)
and print_pattern = function
PTuple {value=patterns;_} -> print_csv print_pattern patterns
| PList p -> print_list_pattern p
| PVar {region; value} ->
Printf.printf "%s: PVar %s\n" (compact region) value
| PUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| PInt {region; value=lex,z} ->
print_token region (sprintf "PInt %s (%s)" lex (Z.to_string z))
| PTrue kwd_true -> print_token kwd_true "true"
| PFalse kwd_false -> print_token kwd_false "false"
| PString s -> print_str s
| PWild wild -> print_token wild "_"
| PPar {value={lpar;inside=p;rpar}; _} ->
print_token lpar "("; print_pattern p; print_token rpar ")"
| PConstr p -> print_constr_pattern p
| PRecord r -> print_record_pattern r
| PTyped t -> print_typed_pattern t
and print_list_pattern = function
Sugar p -> print_injection print_pattern p
| PCons p -> print_raw p
and print_raw {value=p1,c,p2; _} =
print_pattern p1; print_token c "::"; print_pattern p2
and print_typed_pattern {value; _} =
let {pattern; colon; type_expr} = value in
print_pattern pattern;
print_token colon ":";
print_type_expr type_expr
and print_record_pattern record_pattern =
print_injection print_field_pattern record_pattern
and print_field_pattern {value; _} =
let {field_name; eq; pattern} = value in
print_var field_name;
print_token eq "=";
print_pattern pattern
and print_constr_pattern {value=constr, p_opt; _} =
print_uident constr;
match p_opt with
None -> ()
| Some pattern -> print_pattern pattern
and print_expr = function
ELetIn {value;_} -> print_let_in value
| ECond cond -> print_conditional cond
| ETuple {value;_} -> print_csv print_expr value
| ECase {value;_} -> print_match_expr value
| EFun e -> print_fun_expr e
| EAnnot e -> print_annot_expr e
| ELogic e -> print_logic_expr e
| EArith e -> print_arith_expr e
| EString e -> print_string_expr e
| ECall {value=f,l; _} ->
print_expr f; Utils.nseq_iter print_expr l
| EVar v -> print_var v
| EProj p -> print_projection p.value
| EUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| EBytes b -> print_bytes b
| EPar {value={lpar;inside=e;rpar}; _} ->
print_token lpar "("; print_expr e; print_token rpar ")"
| EList e -> print_list_expr e
| ESeq seq -> print_sequence seq
| ERecord e -> print_record_expr e
| EConstr {value=constr,None; _} -> print_uident constr
| EConstr {value=(constr, Some arg); _} ->
print_uident constr; print_expr arg
and print_annot_expr {value=e,t; _} =
print_expr e;
print_token Region.ghost ":";
print_type_expr t
and print_list_expr = function
Cons {value={arg1;op;arg2}; _} ->
print_expr arg1;
print_token op "::";
print_expr arg2
| List e -> print_injection print_expr e
(*| Append {value=e1,append,e2; _} ->
print_expr e1;
print_token append "@";
print_expr e2 *)
and print_arith_expr = function
Add {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "+"; print_expr arg2
| Sub {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "-"; print_expr arg2
| Mult {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "*"; print_expr arg2
| Div {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "/"; print_expr arg2
| Mod {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "mod"; print_expr arg2
| Neg {value={op;arg}; _} -> print_token op "-"; print_expr arg
| Int {region; value=lex,z} ->
print_token region (sprintf "Int %s (%s)" lex (Z.to_string z))
| Mtz {region; value=lex,z} ->
print_token region (sprintf "Mtz %s (%s)" lex (Z.to_string z))
| Nat {region; value=lex,z} ->
print_token region (sprintf "Nat %s (%s)" lex (Z.to_string z))
and print_string_expr = function
Cat {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "^"; print_expr arg2
| String s -> print_str s
and print_logic_expr = function
BoolExpr e -> print_bool_expr e
| CompExpr e -> print_comp_expr e
and print_bool_expr = function
Or {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "||"; print_expr arg2
| And {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "&&"; print_expr arg2
| Not {value={op;arg}; _} -> print_token op "not"; print_expr arg
| True kwd_true -> print_token kwd_true "true"
| False kwd_false -> print_token kwd_false "false"
and print_comp_expr = function
Lt {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<"; print_expr arg2
| Leq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<="; print_expr arg2
| Gt {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op ">"; print_expr arg2
| Geq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op ">="; print_expr arg2
| Neq {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "<>"; print_expr arg2
| Equal {value={arg1;op;arg2}; _} ->
print_expr arg1; print_token op "="; print_expr arg2
and print_record_expr e =
print_injection print_field_assign e
and print_field_assign {value; _} =
let {field_name; assignment; field_expr} = value in
print_var field_name;
print_token assignment "=";
print_expr field_expr
and print_sequence seq = print_injection print_expr seq
and print_match_expr expr =
let {kwd_match; expr; opening;
lead_vbar; cases; closing} = expr in
print_token kwd_match "match";
print_expr expr;
print_opening opening;
print_token_opt lead_vbar "|";
print_cases cases;
print_closing closing
and print_token_opt = function
None -> fun _ -> ()
| Some region -> print_token region
and print_cases {value; _} =
print_nsepseq "|" print_case_clause value
and print_case_clause {value; _} =
let {pattern; arrow; rhs} = value in
print_pattern pattern;
print_token arrow "->";
print_expr rhs
and print_let_in (bind: let_in) =
let {kwd_let; binding; kwd_in; body} = bind in
print_token kwd_let "let";
print_let_binding binding;
print_token kwd_in "in";
print_expr body
and print_fun_expr {value; _} =
let {kwd_fun; params; p_annot; arrow; body} = value in
print_token kwd_fun "fun";
(match p_annot with
None -> List.iter print_pattern params
| Some (colon, type_expr) ->
print_token colon ":";
print_type_expr type_expr);
print_token arrow "->";
print_expr body
and print_conditional {value; _} =
let open Region in
let {kwd_if; test; kwd_then; ifso; kwd_else; ifnot} = value
in print_token ghost "(";
print_token kwd_if "if";
print_expr test;
print_token kwd_then "then";
print_expr ifso;
print_token kwd_else "else";
print_expr ifnot;
print_token ghost ")"

View File

@ -0,0 +1,22 @@
(* Printing the AST *)
val offsets : bool ref
val mode : [`Byte | `Point] ref
(* Printing the tokens reconstructed from the AST. This is very useful
for debugging, as the output of [print_token ast] can be textually
compared to that of [Lexer.trace] (see module [LexerMain]). The
optional parameter [undo] is bound to [true] if the caller wants
the AST to be unparsed before printing (those nodes that have been
normalised with function [norm_let] and [norm_fun]). *)
val print_tokens : AST.t -> unit
(* val print_path : AST.path -> unit *)
val print_pattern : AST.pattern -> unit
val print_expr : AST.expr -> unit
(* val print_instruction : AST.instruction -> unit *)
(* val print_projection : projection -> unit
val print_pattern : pattern -> unit
val print_expr : expr -> unit *)

View File

@ -1,4 +1,4 @@
(* Driver for the parser of CameLIGO *) (* Driver for the parser of Ligodity *)
(* Error printing and exception tracing *) (* Error printing and exception tracing *)
@ -6,11 +6,40 @@ let () = Printexc.record_backtrace true
(* Reading the command-line options *) (* Reading the command-line options *)
let options = EvalOpt.read () let options = EvalOpt.read "Ligodity" ".mligo"
open EvalOpt open EvalOpt
(* Path to the Mini-ML standard library *) (* Auxiliary functions *)
let sprintf = Printf.sprintf
(* Extracting the input file *)
let file =
match options.input with
None | Some "-" -> false
| Some _ -> true
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
type Error.t += ParseError
let error_to_string = function
ParseError -> "Syntax error.\n"
| _ -> assert false
let print_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
(* Path for CPP inclusions (#include) *)
let lib_path = let lib_path =
match options.libs with match options.libs with
@ -18,36 +47,76 @@ let lib_path =
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs "" in List.fold_right mk_I libs ""
(* Opening the input channel and setting the lexing engine *) (* Preprocessing the input source and opening the input channels *)
let cin, reset = let prefix =
match options.input with match options.input with
None | Some "-" -> stdin, ignore None | Some "-" -> "temp"
| Some file -> open_in file, Lexer.reset_file ~file | Some file -> Filename.(file |> basename |> remove_extension)
let buffer = Lexing.from_channel cin let suffix = ".pp.mligo"
let () = reset buffer
let pp_input =
if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match options.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s > %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken)
module Log = LexerLog.Make (Lexer)
let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream (Some pp_input)
and cout = stdout
let log = Log.output_token ~offsets:options.offsets
options.mode options.cmd cout
and close_all () = close (); close_out cout
(* Tokeniser *) (* Tokeniser *)
let tokeniser = let tokeniser = read ~log
if Utils.String.Set.mem "lexer" options.verbose then
Lexer.get_token ~log:(stdout, Lexer.output_token buffer) (* Main *)
else Lexer.get_token ?log:None
let () = let () =
try try
let ast = Parser.program tokeniser buffer in let ast = Parser.contract tokeniser buffer in
if Utils.String.Set.mem "parser" options.verbose if Utils.String.Set.mem "ast" options.verbose
then AST.print_tokens ast then begin
ParserLog.offsets := options.offsets;
ParserLog.mode := options.mode;
ParserLog.print_tokens ast
end
with with
Lexer.Error diag -> Lexer.Error err ->
close_in cin; Lexer.prerr ~kind:"Lexical" diag close_all ();
Lexer.print_error ~offsets:options.offsets
options.mode err ~file
| Parser.Error -> | Parser.Error ->
let start = Pos.from_byte (Lexing.lexeme_start_p buffer) let region = get_last () in
and stop = Pos.from_byte (Lexing.lexeme_end_p buffer) in let error = Region.{region; value=ParseError} in
let region = Region.make ~start ~stop in let () = close_all () in
close_in cin; print_error ~offsets:options.offsets
Lexer.prerr ~kind:"Syntactical" options.mode error ~file
Region.{value="Parse error."; region}
| Sys_error msg -> Utils.highlight msg | Sys_error msg -> Utils.highlight msg

View File

@ -1,145 +0,0 @@
(* Abstract lexical tokens for Mini-ML *)
type t =
(* Symbols *)
ARROW
| CONS
| CAT
(*| APPEND*)
| MINUS
| PLUS
| SLASH
| TIMES
| LPAR
| RPAR
| LBRACKET
| RBRACKET
| LBRACE
| RBRACE
| COMMA
| SEMI
| VBAR
| COLON
| DOT
| WILD
| EQ
| NE
| LT
| GT
| LE
| GE
| BOOL_OR
| BOOL_AND
(* Identifiers, numbers and strings *)
| Ident of string
| Constr of string
| Int of (string * Z.t)
| Nat of (string * Z.t)
| Mtz of (string * Z.t)
| Str of string
| Bytes of (string * Hex.t)
(* Keywords *)
(*| And*)
| Begin
| Else
| End
| False
| Fun
| If
| In
| Let
| List
| Map
| Match
| Mod
| Not
| Of
| Or
| Set
| Then
| True
| Type
| With
| LetEntry
| MatchNat
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF (* End of file *)
type token = t
let sprintf = Printf.sprintf
let to_string = function
ARROW -> "->"
| CONS -> "::"
| CAT -> "^"
(*| APPEND -> "@"*)
| MINUS -> "-"
| PLUS -> "+"
| SLASH -> "/"
| TIMES -> "*"
| LPAR -> "("
| RPAR -> ")"
| LBRACKET -> "["
| RBRACKET -> "]"
| LBRACE -> "{"
| RBRACE -> "}"
| COMMA -> ","
| SEMI -> ";"
| VBAR -> "|"
| COLON -> ":"
| DOT -> "."
| WILD -> "_"
| EQ -> "="
| NE -> "<>"
| LT -> "<"
| GT -> ">"
| LE -> "<="
| GE -> ">="
| BOOL_OR -> "||"
| BOOL_AND -> "&&"
| Ident id -> sprintf "Ident %s" id
| Constr id -> sprintf "Constr %s" id
| Int (lex,z) -> sprintf "Int %s (%s)" lex (Z.to_string z)
| Nat (lex,z) -> sprintf "Nat %s (%s)" lex (Z.to_string z)
| Mtz (lex,z) -> sprintf "Mtz %s (%s)" lex (Z.to_string z)
| Str n -> sprintf "Str \"%s\"" n
| Bytes (lex,h) -> sprintf "Bytes %s (0x%s)" lex (Hex.to_string h)
(*| And -> "and"*)
| Begin -> "begin"
| Else -> "else"
| End -> "end"
| False -> "false"
| Fun -> "fun"
| If -> "if"
| In -> "in"
| Let -> "let"
| List -> "list"
| Map -> "map"
| Match -> "match"
| Mod -> "mod"
| Not -> "not"
| Of -> "of"
| Or -> "or"
| Set -> "set"
| Then -> "then"
| True -> "true"
| Type -> "type"
| With -> "with"
| LetEntry -> "let%entry"
| MatchNat -> "match%nat"
| EOF -> "EOF"

View File

@ -1,101 +0,0 @@
(* Lexical tokens for Mini-ML *)
type t =
(* Symbols *)
ARROW (* "->" *)
| CONS (* "::" *)
| CAT (* "^" *)
(*| APPEND (* "@" *)*)
(* Arithmetics *)
| MINUS (* "-" *)
| PLUS (* "+" *)
| SLASH (* "/" *)
| TIMES (* "*" *)
(* Compounds *)
| LPAR (* "(" *)
| RPAR (* ")" *)
| LBRACKET (* "[" *)
| RBRACKET (* "]" *)
| LBRACE (* "{" *)
| RBRACE (* "}" *)
(* Separators *)
| COMMA (* "," *)
| SEMI (* ";" *)
| VBAR (* "|" *)
| COLON (* ":" *)
| DOT (* "." *)
(* Wildcard *)
| WILD (* "_" *)
(* Comparisons *)
| EQ (* "=" *)
| NE (* "<>" *)
| LT (* "<" *)
| GT (* ">" *)
| LE (* "=<" *)
| GE (* ">=" *)
| BOOL_OR (* "||" *)
| BOOL_AND (* "&&" *)
(* Identifiers, labels, numbers and strings *)
| Ident of string
| Constr of string
| Int of (string * Z.t)
| Nat of (string * Z.t)
| Mtz of (string * Z.t)
| Str of string
| Bytes of (string * Hex.t)
(* Keywords *)
(*| And*)
| Begin
| Else
| End
| False
| Fun
| If
| In
| Let
| List
| Map
| Match
| Mod
| Not
| Of
| Or
| Set
| Then
| True
| Type
| With
(* Liquidity specific *)
| LetEntry
| MatchNat
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF (* End of file *)
type token = t
val to_string: t -> string

View File

@ -1,154 +0,0 @@
(* Utility types and functions *)
(* Identity *)
let id x = x
(* Combinators *)
let (<@) f g x = f (g x)
let swap f x y = f y x
let lambda = fun x _ -> x
let curry f x y = f (x,y)
let uncurry f (x,y) = f x y
(* Parametric rules for sequences *)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
let nseq_cons x (hd,tl) = x, hd::tl
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
let sepseq_cons x sep = function
None -> x, []
| Some (hd,tl) -> x, (sep,hd)::tl
(* Rightwards iterators *)
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
let nsepseq_foldl f a (hd,tl) =
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
let sepseq_foldl f a = function
None -> a
| Some s -> nsepseq_foldl f a s
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
let sepseq_iter f = function
None -> ()
| Some s -> nsepseq_iter f s
(* Reversing *)
let nseq_rev (hd,tl) =
let rec aux acc = function
[] -> acc
| x::l -> aux (nseq_cons x acc) l
in aux (hd,[]) tl
let nsepseq_rev =
let rec aux acc = function
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
| hd, [] -> hd, acc in
function
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
| s -> s
let sepseq_rev = function
None -> None
| Some seq -> Some (nsepseq_rev seq)
(* Leftwards iterators *)
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
let sepseq_foldr f = function
None -> fun a -> a
| Some s -> nsepseq_foldr f s
(* Conversions to lists *)
let nseq_to_list (x,y) = x::y
let nsepseq_to_list (x,y) = x :: List.map snd y
let sepseq_to_list = function
None -> []
| Some s -> nsepseq_to_list s
(* Optional values *)
module Option = struct
let apply f x =
match x with
Some y -> Some (f y)
| None -> None
let rev_apply x y =
match x with
Some f -> f y
| None -> y
let to_string = function
None -> ""
| Some x -> x
end
(* Modules based on [String], like sets and maps. *)
module String = struct
include String
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Integers *)
module Int = struct
type t = int
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Effectful symbol generator *)
let gen_sym =
let counter = ref 0 in
fun () -> incr counter; "#" ^ string_of_int !counter
(* General tracing function *)
let trace text = function
None -> ()
| Some chan -> output_string chan text; flush chan
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg

View File

@ -1,97 +0,0 @@
(* Utility types and functions *)
(* Identity *)
val id : 'a -> 'a
(* Combinators *)
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
val lambda : 'a -> 'b -> 'a
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
(* Parametric rules for sequences
nseq: non-empty sequence;
sepseq: (possibly empty) sequence of separated items;
nsepseq: non-empty sequence of separated items.
*)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
val nseq_cons : 'a -> 'a nseq -> 'a nseq
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
(* Reversing *)
val nseq_rev : 'a nseq -> 'a nseq
val nsepseq_rev : ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_rev : ('a,'sep) sepseq -> ('a,'sep) sepseq
(* Rightwards iterators *)
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
(* Leftwards iterators *)
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
(* Conversions to lists *)
val nseq_to_list : 'a nseq -> 'a list
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
val sepseq_to_list : ('a,'b) sepseq -> 'a list
(* Effectful symbol generator *)
val gen_sym : unit -> string
(* General tracing function *)
val trace : string -> out_channel option -> unit
(* Printing a string in red to standard error *)
val highlight : string -> unit
(* Working with optional values *)
module Option:
sig
val apply : ('a -> 'b) -> 'a option -> 'b option
val rev_apply : ('a -> 'a) option -> 'a -> 'a
val to_string : string option -> string
end
(* An extension to the standard module [String] *)
module String :
sig
include module type of String
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end
(* Integer maps *)
module Int :
sig
type t = int
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end

View File

@ -1,42 +1,40 @@
(ocamllex Lexer) (ocamllex LexToken)
(menhir (menhir
(merge_into Parser) (merge_into Parser)
(modules ParToken Parser) (modules ParToken Parser)
(flags -la 1 --explain --external-tokens Token)) (flags -la 1 --explain --external-tokens LexToken))
(library (library
(name parser_ligodity) (name parser_ligodity)
(public_name ligo.parser.ligodity) (public_name ligo.parser.ligodity)
(modules AST ligodity Utils Version Lexer Parser Token) (modules AST ligodity Parser ParserLog LexToken)
;; (modules_without_implementation Error)
(libraries (libraries
parser_shared
str str
zarith
simple-utils simple-utils
tezos-utils tezos-utils
getopt getopt
) )
(flags (:standard -open Simple_utils )) (flags (:standard -open Simple_utils -open Parser_shared ))
) )
;; Les deux directives (rule) qui suivent sont pour le dev local. (executable
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier. (name LexerMain)
;; Pour le purger, il faut faire "dune clean". (libraries
;(rule parser_ligodity)
; (targets Parser.exe) (modules
; (deps ParserMain.exe) LexerMain
; (action (copy ParserMain.exe Parser.exe)) )
; (mode (promote (until-clean) (only *)))) (flags (:standard -open Parser_shared -open Parser_ligodity))
)
;(rule (executable
; (targets Lexer.exe) (name ParserMain)
; (deps LexerMain.exe) (libraries
; (action (copy LexerMain.exe Lexer.exe)) parser_ligodity)
; (mode (promote (until-clean) (only *)))) (modules
ParserMain
(rule )
(targets Version.ml) (flags (:standard -open Simple_utils -open Parser_shared -open Parser_ligodity))
(action )
(progn (run "sh" "-c" "printf 'let version = \"%s\"'\\\\n \"$(echo UNKNOWN)\" > Version.ml")))
(mode (promote (until-clean) (only *))))

View File

@ -1,4 +1,5 @@
module Token = Token
module Lexer = Lexer
module AST = AST
module Parser = Parser module Parser = Parser
module AST = AST
module Lexer = Lexer
module LexToken = LexToken
module ParserLog = ParserLog

View File

@ -1,8 +1,10 @@
open Trace open Trace
open Parser_pascaligo
module Parser = Parser_pascaligo.Parser module Parser = Parser_pascaligo.Parser
module AST = Parser_pascaligo.AST module AST = Parser_pascaligo.AST
module ParserLog = Parser_pascaligo.ParserLog module ParserLog = Parser_pascaligo.ParserLog
module LexToken = Parser_pascaligo.LexToken
module Lexer = Lexer.Make(LexToken)
let parse_file (source: string) : AST.t result = let parse_file (source: string) : AST.t result =
let pp_input = let pp_input =
@ -18,7 +20,6 @@ let parse_file (source: string) : AST.t result =
generic_try (simple_error "error opening file") @@ generic_try (simple_error "error opening file") @@
(fun () -> open_in pp_input) in (fun () -> open_in pp_input) in
let lexbuf = Lexing.from_channel channel in let lexbuf = Lexing.from_channel channel in
let module Lexer = Lexer.Make(LexToken) in
let Lexer.{read ; close ; _} = let Lexer.{read ; close ; _} =
Lexer.open_token_stream None in Lexer.open_token_stream None in
specific_try (function specific_try (function
@ -54,9 +55,7 @@ let parse_file (source: string) : AST.t result =
ok raw ok raw
let parse_string (s:string) : AST.t result = let parse_string (s:string) : AST.t result =
let lexbuf = Lexing.from_string s in let lexbuf = Lexing.from_string s in
let module Lexer = Lexer.Make(LexToken) in
let Lexer.{read ; close ; _} = let Lexer.{read ; close ; _} =
Lexer.open_token_stream None in Lexer.open_token_stream None in
specific_try (function specific_try (function
@ -80,7 +79,6 @@ let parse_string (s:string) : AST.t result =
let parse_expression (s:string) : AST.expr result = let parse_expression (s:string) : AST.expr result =
let lexbuf = Lexing.from_string s in let lexbuf = Lexing.from_string s in
let module Lexer = Lexer.Make(LexToken) in
let Lexer.{read ; close; _} = let Lexer.{read ; close; _} =
Lexer.open_token_stream None in Lexer.open_token_stream None in
specific_try (function specific_try (function

View File

@ -192,7 +192,7 @@ and type_expr =
| TSum of (variant reg, vbar) nsepseq reg | TSum of (variant reg, vbar) nsepseq reg
| TRecord of record_type | TRecord of record_type
| TApp of (type_name * type_tuple) reg | TApp of (type_name * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg | TFun of (type_expr * arrow * type_expr) reg
| TPar of type_expr par reg | TPar of type_expr par reg
| TAlias of variable | TAlias of variable

View File

@ -1,4 +1,4 @@
(* Abstract Syntax Tree (AST) for LIGO *) (* Abstract Syntax Tree (AST) for Pascaligo *)
[@@@warning "-30"] [@@@warning "-30"]

View File

@ -143,10 +143,14 @@ type int_err =
type ident_err = Reserved_name type ident_err = Reserved_name
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_nat : lexeme -> Region.t -> (token, int_err) result val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result
val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token

View File

@ -484,14 +484,22 @@ let mk_int lexeme region =
then Error Non_canonical_zero then Error Non_canonical_zero
else Ok (Int Region.{region; value = lexeme, z}) else Ok (Int Region.{region; value = lexeme, z})
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
let mk_nat lexeme region = let mk_nat lexeme region =
let z = match (String.index_opt lexeme 'n') with
Str.(global_replace (regexp "_") "" lexeme) |> | None -> Error Invalid_natural
Str.(global_replace (regexp "n") "") |> | Some _ -> (
Z.of_string in let z =
if Z.equal z Z.zero && lexeme <> "0n" Str.(global_replace (regexp "_") "" lexeme) |>
then Error Non_canonical_zero Str.(global_replace (regexp "n") "") |>
else Ok (Nat Region.{region; value = lexeme, z}) Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme, z})
)
let mk_mtz lexeme region = let mk_mtz lexeme region =
let z = let z =

View File

@ -6,7 +6,7 @@ let () = Printexc.record_backtrace true
(* Running the lexer on the source *) (* Running the lexer on the source *)
let options = EvalOpt.read () let options = EvalOpt.read "PascaLIGO" ".ligo"
open EvalOpt open EvalOpt

View File

@ -133,7 +133,8 @@ type_decl:
kwd_is = $3; kwd_is = $3;
type_expr = $4; type_expr = $4;
terminator = $5} terminator = $5}
in {region; value}} in {region; value}
}
type_expr: type_expr:
cartesian { TProd $1 } cartesian { TProd $1 }

View File

@ -6,7 +6,7 @@ let () = Printexc.record_backtrace true
(* Reading the command-line options *) (* Reading the command-line options *)
let options = EvalOpt.read () let options = EvalOpt.read "Pascaligo" ".ligo"
open EvalOpt open EvalOpt

View File

@ -31,6 +31,16 @@
(flags (:standard -open Parser_shared -open Parser_pascaligo)) (flags (:standard -open Parser_shared -open Parser_pascaligo))
) )
(executable
(name ParserMain)
(libraries
parser_pascaligo)
(modules
ParserMain
)
(flags (:standard -open Simple_utils -open Parser_shared -open Parser_pascaligo))
)
;; Les deux directives (rule) qui suivent sont pour le dev local. ;; Les deux directives (rule) qui suivent sont pour le dev local.
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier. ;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier.
;; Pour le purger, il faut faire "dune clean". ;; Pour le purger, il faut faire "dune clean".

View File

@ -1,4 +1,4 @@
(* Parsing the command-line options of PascaLIGO *) (* Parsing command-line options *)
(* The type [command] denotes some possible behaviours of the (* The type [command] denotes some possible behaviours of the
compiler. *) compiler. *)
@ -27,10 +27,10 @@ let abort msg =
(* Help *) (* Help *)
let help () = let help language extension () =
let file = Filename.basename Sys.argv.(0) in let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file; printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
print "where <input>.ligo is the PascaLIGO source file (default: stdin),"; printf "where <input>%s is the %s source file (default: stdin)," extension language;
print "and each <option> (if any) is one of the following:"; print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)"; print " -I <paths> Library paths (colon-separated)";
print " -c, --copy Print lexemes of tokens and markup (lexer)"; print " -c, --copy Print lexemes of tokens and markup (lexer)";
@ -70,7 +70,7 @@ let add_verbose d =
!verbose !verbose
(split_at_colon d) (split_at_colon d)
let specs = let specs language extension =
let open! Getopt in [ let open! Getopt in [
'I', nolong, None, Some add_path; 'I', nolong, None, Some add_path;
'c', "copy", set copy true, None; 'c', "copy", set copy true, None;
@ -80,7 +80,7 @@ let specs =
noshort, "columns", set columns true, None; noshort, "columns", set columns true, None;
noshort, "bytes", set bytes true, None; noshort, "bytes", set bytes true, None;
noshort, "verbose", None, Some add_verbose; noshort, "verbose", None, Some add_verbose;
'h', "help", Some help, None; 'h', "help", Some (help language extension), None;
noshort, "version", Some version, None noshort, "version", Some version, None
] ]
;; ;;
@ -119,7 +119,7 @@ let print_opt () =
printf "libs = %s\n" (string_of_path !libs) printf "libs = %s\n" (string_of_path !libs)
;; ;;
let check () = let check extension =
let () = let () =
if Utils.String.Set.mem "cmdline" !verbose then print_opt () in if Utils.String.Set.mem "cmdline" !verbose then print_opt () in
@ -127,11 +127,11 @@ let check () =
match !input with match !input with
None | Some "-" -> !input None | Some "-" -> !input
| Some file_path -> | Some file_path ->
if Filename.check_suffix file_path ".ligo" if Filename.check_suffix file_path extension
then if Sys.file_exists file_path then if Sys.file_exists file_path
then Some file_path then Some file_path
else abort "Source file not found." else abort "Source file not found."
else abort "Source file lacks the extension .ligo." in else abort ("Source file lacks the extension " ^ extension ^ ".") in
(* Exporting remaining options as non-mutable values *) (* Exporting remaining options as non-mutable values *)
@ -172,12 +172,12 @@ let check () =
(* Parsing the command-line options *) (* Parsing the command-line options *)
let read () = let read language extension =
try try
Getopt.parse_cmdline specs anonymous; Getopt.parse_cmdline (specs language extension) anonymous;
(verb_str := (verb_str :=
let apply e a = let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""); in Utils.String.Set.fold apply !verbose "");
check () check extension
with Getopt.Error msg -> abort msg with Getopt.Error msg -> abort msg

View File

@ -49,4 +49,4 @@ type options = {
(* Parsing the command-line options on stdin *) (* Parsing the command-line options on stdin *)
val read : unit -> options val read : string -> string -> options

View File

@ -60,15 +60,18 @@ module type TOKEN =
(* Errors *) (* Errors *)
type int_err = Non_canonical_zero type int_err = Non_canonical_zero
type ident_err = Reserved_name type ident_err = Reserved_name
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
(* Injections *) (* Injections *)
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_nat : lexeme -> Region.t -> (token, int_err) result val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result
val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token

View File

@ -101,15 +101,18 @@ module type TOKEN =
(* Errors *) (* Errors *)
type int_err = Non_canonical_zero type int_err = Non_canonical_zero
type ident_err = Reserved_name type ident_err = Reserved_name
type invalid_natural =
| Invalid_natural
| Non_canonical_zero_nat
(* Injections *) (* Injections *)
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_nat : lexeme -> Region.t -> (token, int_err) result val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result
val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token
@ -340,6 +343,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
type Error.t += Broken_string type Error.t += Broken_string
type Error.t += Invalid_character_in_string type Error.t += Invalid_character_in_string
type Error.t += Reserved_name type Error.t += Reserved_name
type Error.t += Invalid_natural
let error_to_string = function let error_to_string = function
Invalid_utf8_sequence -> Invalid_utf8_sequence ->
@ -382,6 +386,8 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
| Reserved_name -> | Reserved_name ->
"Reserved named.\n\ "Reserved named.\n\
Hint: Change the name.\n" Hint: Change the name.\n"
| Invalid_natural ->
"Invalid natural."
| _ -> assert false | _ -> assert false
exception Error of Error.t Region.reg exception Error of Error.t Region.reg
@ -421,8 +427,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
let region, lexeme, state = sync state buffer in let region, lexeme, state = sync state buffer in
match Token.mk_nat lexeme region with match Token.mk_nat lexeme region with
Ok token -> token, state Ok token -> token, state
| Error Token.Non_canonical_zero -> | Error Token.Non_canonical_zero_nat ->
fail region Non_canonical_zero fail region Non_canonical_zero
| Error Token.Invalid_natural ->
fail region Invalid_natural
let mk_mtz state buffer = let mk_mtz state buffer =
let region, lexeme, state = sync state buffer in let region, lexeme, state = sync state buffer in
@ -431,6 +439,43 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
| Error Token.Non_canonical_zero -> | Error Token.Non_canonical_zero ->
fail region Non_canonical_zero fail region Non_canonical_zero
let mk_tz state buffer =
let region, lexeme, state = sync state buffer in
let lexeme = Str.string_before lexeme (String.index lexeme 't') in
let lexeme = Z.mul (Z.of_int 1_000_000) (Z.of_string lexeme) in
match Token.mk_mtz (Z.to_string lexeme ^ "mtz") region with
Ok token -> token, state
| Error Token.Non_canonical_zero ->
fail region Non_canonical_zero
let format_tz s =
match String.index s '.' with
index ->
let len = String.length s in
let integral = Str.first_chars s index
and fractional = Str.last_chars s (len-index-1) in
let num = Z.of_string (integral ^ fractional)
and den = Z.of_string ("1" ^ String.make (len-index-1) '0')
and million = Q.of_string "1000000" in
let mtz = Q.make num den |> Q.mul million in
let should_be_1 = Q.den mtz in
if Z.equal Z.one should_be_1 then Some (Q.num mtz) else None
| exception Not_found -> assert false
let mk_tz_decimal state buffer =
let region, lexeme, state = sync state buffer in
let lexeme = Str.string_before lexeme (String.index lexeme 't') in
match format_tz lexeme with
| Some tz -> (
match Token.mk_mtz (Z.to_string tz ^ "mtz") region with
Ok token ->
token, state
| Error Token.Non_canonical_zero ->
fail region Non_canonical_zero
)
| None -> assert false
let mk_ident state buffer = let mk_ident state buffer =
let region, lexeme, state = sync state buffer in let region, lexeme, state = sync state buffer in
match Token.mk_ident lexeme region with match Token.mk_ident lexeme region with
@ -462,10 +507,11 @@ let blank = ' ' | '\t'
let digit = ['0'-'9'] let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural let integer = '-'? natural
let decimal = digit+ '.' digit+
let small = ['a'-'z'] let small = ['a'-'z']
let capital = ['A'-'Z'] let capital = ['A'-'Z']
let letter = small | capital let letter = small | capital
let ident = small (letter | '_' | digit)* let ident = small (letter | '_' | digit | '%')*
let constr = capital (letter | '_' | digit)* let constr = capital (letter | '_' | digit)*
let hexa_digit = digit | ['A'-'F'] let hexa_digit = digit | ['A'-'F']
let byte = hexa_digit hexa_digit let byte = hexa_digit hexa_digit
@ -477,6 +523,7 @@ let symbol = ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}'
| '#' | '|' | "->" | ":=" | '=' | ':' | '#' | '|' | "->" | ":=" | '=' | ':'
| '<' | "<=" | '>' | ">=" | "=/=" | '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '/' | '.' | '_' | '^' | '+' | '-' | '*' | '/' | '.' | '_' | '^'
| "::" | "||" | "&&"
let string = [^'"' '\\' '\n']* (* For strings of #include *) let string = [^'"' '\\' '\n']* (* For strings of #include *)
(* RULES *) (* RULES *)
@ -497,12 +544,14 @@ and scan state = parse
nl { scan (push_newline state lexbuf) lexbuf } nl { scan (push_newline state lexbuf) lexbuf }
| ' '+ { scan (push_space state lexbuf) lexbuf } | ' '+ { scan (push_space state lexbuf) lexbuf }
| '\t'+ { scan (push_tabs state lexbuf) lexbuf } | '\t'+ { scan (push_tabs state lexbuf) lexbuf }
| ident { mk_ident state lexbuf |> enqueue } | ident { mk_ident state lexbuf |> enqueue }
| constr { mk_constr state lexbuf |> enqueue } | constr { mk_constr state lexbuf |> enqueue }
| bytes { (mk_bytes seq) state lexbuf |> enqueue } | bytes { (mk_bytes seq) state lexbuf |> enqueue }
| natural 'n' { mk_nat state lexbuf |> enqueue } | natural 'n' { mk_nat state lexbuf |> enqueue }
| natural "mtz" { mk_mtz state lexbuf |> enqueue } | natural 'p' { mk_nat state lexbuf |> enqueue }
| natural "mtz" { mk_mtz state lexbuf |> enqueue }
| natural "tz" { mk_tz state lexbuf |> enqueue }
| decimal "tz" { mk_tz_decimal state lexbuf |> enqueue }
| integer { mk_int state lexbuf |> enqueue } | integer { mk_int state lexbuf |> enqueue }
| symbol { mk_sym state lexbuf |> enqueue } | symbol { mk_sym state lexbuf |> enqueue }
| eof { mk_eof state lexbuf |> enqueue } | eof { mk_eof state lexbuf |> enqueue }

View File

@ -7,6 +7,7 @@
simple-utils simple-utils
uutf uutf
getopt getopt
zarith
) )
(modules (modules
Error Error

View File

@ -121,7 +121,7 @@ module Errors = struct
let message () = "" in let message () = "" in
let data = [ let data = [
("expression" , ("expression" ,
thunk @@ Format.asprintf "%a" (PP_helpers.printer Raw.print_expr) t) thunk @@ Format.asprintf "%a" PP_helpers.(printer Parser.Ligodity.ParserLog.print_expr) t)
] in ] in
error ~data title message error ~data title message
@ -751,7 +751,7 @@ and simpl_cases : type a . (Raw.pattern * a) list -> a matching result =
let error x = let error x =
let title () = "Pattern" in let title () = "Pattern" in
let content () = let content () =
Format.asprintf "Pattern : %a" (PP_helpers.printer Raw.print_pattern) x in Format.asprintf "Pattern : %a" (PP_helpers.printer Parser.Ligodity.ParserLog.print_pattern) x in
error title content error title content
in in
let as_variant () = let as_variant () =
@ -770,7 +770,7 @@ and simpl_cases : type a . (Raw.pattern * a) list -> a matching result =
ok @@ Match_variant constrs ok @@ Match_variant constrs
in in
let as_option () = let as_option () =
let aux (x , y) = let aux (x , y) =
let%bind x' = let%bind x' =
trace (error x) @@ trace (error x) @@
get_constr_opt x get_constr_opt x

View File

@ -1,4 +1,4 @@
const add_tez : tez = 21mtz + 21mtz; const add_tez : tez = 21mtz + 0.000021tz;
const sub_tez : tez = 21mtz - 20mtz; const sub_tez : tez = 21mtz - 20mtz;
(* is this enough? *) (* is this enough? *)
const not_enough_tez : tez = 4611686018427387903mtz; const not_enough_tez : tez = 4611686018427387903mtz;

View File

@ -1,4 +1,4 @@
let add_tez : tez = 0.000021tz + 0.000021tz let add_tez : tez = 21mtz + 0.000021tz
let sub_tez : tez = 0.000021tz - 0.000020tz let sub_tez : tez = 0.000021tz - 0.000020tz
let not_enough_tez : tez = 4611686018427.387903tz let not_enough_tez : tez = 4611686018427.387903tz

View File

@ -0,0 +1,10 @@
type foobar =
| Foo of int
| Bar of bool
| Kee of nat
let foo : foobar = Foo 42
let bar : foobar = Bar true
let kee : foobar = Kee 23p

View File

@ -54,6 +54,19 @@ let variant () : unit result =
expect_eq_evaluate program "kee" expected in expect_eq_evaluate program "kee" expected in
ok () ok ()
let variant_mligo () : unit result =
let%bind program = mtype_file "./contracts/variant.mligo" in
let%bind () =
let expected = e_constructor "Foo" (e_int 42) in
expect_eq_evaluate program "foo" expected in
let%bind () =
let expected = e_constructor "Bar" (e_bool true) in
expect_eq_evaluate program "bar" expected in
let%bind () =
let expected = e_constructor "Kee" (e_nat 23) in
expect_eq_evaluate program "kee" expected in
ok ()
let variant_matching () : unit result = let variant_matching () : unit result =
let%bind program = type_file "./contracts/variant-matching.ligo" in let%bind program = type_file "./contracts/variant-matching.ligo" in
let%bind () = let%bind () =
@ -818,6 +831,7 @@ let main = test_suite "Integration (End to End)" [
test "shared function" shared_function ; test "shared function" shared_function ;
test "higher order" higher_order ; test "higher order" higher_order ;
test "variant" variant ; test "variant" variant ;
test "variant (mligo)" variant_mligo ;
test "variant matching" variant_matching ; test "variant matching" variant_matching ;
test "tuple" tuple ; test "tuple" tuple ;
test "record" record ; test "record" record ;