[WIP] Refactoring of front-end.

This commit is contained in:
Christian Rinderknecht 2020-01-20 10:57:07 +01:00
parent 673b54e6ae
commit 8384e3d1f7
28 changed files with 855 additions and 417 deletions

View File

@ -312,6 +312,7 @@ and comp_expr =
| Neq of neq bin_op reg | Neq of neq bin_op reg
and record = field_assign reg ne_injection and record = field_assign reg ne_injection
and projection = { and projection = {
struct_name : variable; struct_name : variable;
selector : dot; selector : dot;
@ -335,6 +336,7 @@ and update = {
updates : record reg; updates : record reg;
rbrace : rbrace; rbrace : rbrace;
} }
and path = and path =
Name of variable Name of variable
| Path of projection reg | Path of projection reg
@ -376,7 +378,7 @@ and cond_expr = {
ifso : expr; ifso : expr;
kwd_else : kwd_else; kwd_else : kwd_else;
ifnot : expr ifnot : expr
} }
(* Projecting regions from some nodes of the AST *) (* Projecting regions from some nodes of the AST *)

View File

@ -1,129 +1,148 @@
open Trace open Trace
module Parser = Parser_pascaligo.Parser (*module Parser = Parser_pascaligo.Parser*)
module AST = Parser_pascaligo.AST
(*module ParserLog = Parser_pascaligo.ParserLog*) (*module ParserLog = Parser_pascaligo.ParserLog*)
module AST = Parser_pascaligo.AST
module ParErr = Parser_pascaligo.ParErr
module LexToken = Parser_pascaligo.LexToken module LexToken = Parser_pascaligo.LexToken
module Lexer = Lexer.Make(LexToken) module Lexer = Lexer.Make(LexToken)
module Scoping = Parser_pascaligo.Scoping module Scoping = Parser_pascaligo.Scoping
module SSet = Utils.String.Set
module Errors = struct (* Mock options. TODO: Plug in cmdliner. *)
let lexer_error (e: Lexer.error AST.reg) = let pre_options =
let title () = "lexer error" in EvalOpt.make
let message () = Lexer.error_to_string e.value in ~libs:[]
let data = [ ~verbose:SSet.empty
("parser_loc", ~offsets:true
fun () -> Format.asprintf "%a" Location.pp_lift @@ e.region ~mode:`Point
) ~cmd:EvalOpt.Quiet
] in ~mono:true (* Monolithic API of Menhir for now *)
error ~data title message (* ~input:None *)
(* ~expr:true *)
let reserved_name Region.{value; region} = module Parser =
let title () = Printf.sprintf "reserved name \"%s\"" value in struct
let message () = "" in type ast = AST.t
let data = [ type expr = AST.expr
include Parser_pascaligo.Parser
end
module ParserLog =
struct
type ast = AST.t
type expr = AST.expr
include Parser_pascaligo.ParserLog
end
module PreUnit = ParserUnit.Make (Lexer)(AST)(Parser)(ParErr)(ParserLog)
module Front = ParserAPI.Make (Lexer)(Parser)(ParErr)
let issue_error point =
let error = Front.format_error ~offsets:true (* TODO: CLI *)
`Point (* TODO: CLI *) point
in Stdlib.Error error
module Errors =
struct
let reserved_name Region.{value; region} =
let title () = Printf.sprintf "reserved name \"%s\"" value in
let message () = "" in
let data = [
("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ region)]
in error ~data title message
let non_linear_pattern Region.{value; region} =
let title () =
Printf.sprintf "repeated variable \"%s\" in this pattern" value in
let message () = "" in
let data = [
("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ region)]
in error ~data title message
let duplicate_parameter Region.{value; region} =
let title () =
Printf.sprintf "duplicate parameter \"%s\"" value in
let message () = "" in
let data = [
("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ region)]
in error ~data title message
let duplicate_variant Region.{value; region} =
let title () =
Printf.sprintf "duplicate variant \"%s\" in this\
type declaration" value in
let message () = "" in
let data = [
("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ region)]
in error ~data title message
let unrecognized_error source (start: Lexing.position)
(stop: Lexing.position) lexbuf =
let title () = "unrecognized error" in
let file =
if source = "" then ""
else
Format.sprintf "In file \"%s|%s\"" start.pos_fname source in
let message () =
Format.sprintf
"Parse error at \"%s\" from (%d, %d) to (%d, %d). %s\n"
(Lexing.lexeme lexbuf)
start.pos_lnum (start.pos_cnum - start.pos_bol)
stop.pos_lnum (stop.pos_cnum - stop.pos_bol)
file in
let loc = Region.make ~start:(Pos.from_byte start)
~stop:(Pos.from_byte stop) in
let data = [
("unrecognized_loc",
fun () -> Format.asprintf "%a" Location.pp_lift @@ loc)]
in error ~data title message
let detached_attributes (attrs: AST.attributes) =
let title () = "detached attributes" in
let message () = "" in
let data = [
("location", ("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ region) fun () -> Format.asprintf "%a" Location.pp_lift @@ attrs.region)]
] in in error ~data title message
error ~data title message
let non_linear_pattern Region.{value; region} = let parser_error source (start: Lexing.position)
let title () = Printf.sprintf "repeated variable \"%s\" in this pattern" value in (stop: Lexing.position) lexbuf =
let message () = "" in let title () = "parser error" in
let data = [ let file =
("location", if source = "" then ""
fun () -> Format.asprintf "%a" Location.pp_lift @@ region) else
] in Format.sprintf "In file \"%s|%s\"" start.pos_fname source in
error ~data title message let message () =
Format.sprintf
let duplicate_parameter Region.{value; region} = "Parse error at \"%s\" from (%d, %d) to (%d, %d). %s\n"
let title () = Printf.sprintf "duplicate parameter \"%s\"" value in (Lexing.lexeme lexbuf)
let message () = "" in start.pos_lnum (start.pos_cnum - start.pos_bol)
let data = [ stop.pos_lnum (stop.pos_cnum - stop.pos_bol)
("location", file in
fun () -> Format.asprintf "%a" Location.pp_lift @@ region) let loc =
] in if start.pos_cnum = -1 then
error ~data title message Region.make ~start: Pos.min ~stop:(Pos.from_byte stop)
else
let duplicate_variant Region.{value; region} = Region.make ~start:(Pos.from_byte start)
let title () = Printf.sprintf "duplicate variant \"%s\" in this\ ~stop:(Pos.from_byte stop) in
type declaration" value in let data =
let message () = "" in [("parser_loc",
let data = [ fun () -> Format.asprintf "%a" Location.pp_lift @@ loc)] in
("location", error ~data title message
fun () -> Format.asprintf "%a" Location.pp_lift @@ region)
] in
error ~data title message
let detached_attributes (attrs: AST.attributes) =
let title () = "detached attributes" in
let message () = "" in
let data = [
("location",
fun () -> Format.asprintf "%a" Location.pp_lift @@ attrs.region)
] in
error ~data title message
let parser_error source (start: Lexing.position) (end_: Lexing.position) lexbuf =
let title () = "parser error" in
let file = if source = "" then
""
else
Format.sprintf "In file \"%s|%s\"" start.pos_fname source
in
let str = Format.sprintf
"Parse error at \"%s\" from (%d, %d) to (%d, %d). %s\n"
(Lexing.lexeme lexbuf)
start.pos_lnum (start.pos_cnum - start.pos_bol)
end_.pos_lnum (end_.pos_cnum - end_.pos_bol)
file
in
let message () = str in
let loc = if start.pos_cnum = -1 then
Region.make
~start: Pos.min
~stop:(Pos.from_byte end_)
else
Region.make
~start:(Pos.from_byte start)
~stop:(Pos.from_byte end_)
in
let data =
[
("parser_loc",
fun () -> Format.asprintf "%a" Location.pp_lift @@ loc
)
]
in
error ~data title message
let unrecognized_error source (start: Lexing.position) (end_: Lexing.position) lexbuf =
let title () = "unrecognized error" in
let file = if source = "" then
""
else
Format.sprintf "In file \"%s|%s\"" start.pos_fname source
in
let str = Format.sprintf
"Parse error at \"%s\" from (%d, %d) to (%d, %d). %s\n"
(Lexing.lexeme lexbuf)
start.pos_lnum (start.pos_cnum - start.pos_bol)
end_.pos_lnum (end_.pos_cnum - end_.pos_bol)
file
in
let message () = str in
let loc = Region.make
~start:(Pos.from_byte start)
~stop:(Pos.from_byte end_)
in
let data = [
("unrecognized_loc",
fun () -> Format.asprintf "%a" Location.pp_lift @@ loc
)
] in
error ~data title message
let lexer_error (e: Lexer.error AST.reg) =
let title () = "lexer error" in
let message () = Lexer.error_to_string e.value in
let data = [
("parser_loc",
fun () -> Format.asprintf "%a" Location.pp_lift @@ e.region)]
in error ~data title message
end end
open Errors open Errors
@ -131,35 +150,37 @@ open Errors
type 'a parser = (Lexing.lexbuf -> LexToken.token) -> Lexing.lexbuf -> 'a type 'a parser = (Lexing.lexbuf -> LexToken.token) -> Lexing.lexbuf -> 'a
let parse (parser: 'a parser) source lexbuf = let parse (parser: 'a parser) source lexbuf =
let Lexer.{read ; close ; _} = Lexer.open_token_stream None in let Lexer.{read; close; _} = Lexer.open_token_stream None in
let result = let result =
try try ok (parser read lexbuf) with
ok (parser read lexbuf) Lexer.Error e ->
with fail @@ lexer_error e
Scoping.Error (Scoping.Non_linear_pattern var) -> | Parser.Error ->
fail @@ (non_linear_pattern var) let start = Lexing.lexeme_start_p lexbuf in
let stop = Lexing.lexeme_end_p lexbuf in
fail @@ parser_error source start stop lexbuf
| Scoping.Error (Scoping.Non_linear_pattern var) ->
fail @@ non_linear_pattern var
| Scoping.Error (Duplicate_parameter name) -> | Scoping.Error (Duplicate_parameter name) ->
fail @@ (duplicate_parameter name) fail @@ duplicate_parameter name
| Scoping.Error (Duplicate_variant name) -> | Scoping.Error (Duplicate_variant name) ->
fail @@ (duplicate_variant name) fail @@ duplicate_variant name
| Scoping.Error (Reserved_name name) -> | Scoping.Error (Reserved_name name) ->
fail @@ (reserved_name name) fail @@ reserved_name name
| SyntaxError.Error (Detached_attributes attrs) -> | Scoping.Error (Detached_attributes attrs) ->
fail @@ (detached_attributes attrs) fail @@ detached_attributes attrs
| Parser.Error -> | Parser.Error ->
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let end_ = Lexing.lexeme_end_p lexbuf in let end_ = Lexing.lexeme_end_p lexbuf in
fail @@ (parser_error source start end_ lexbuf) fail @@ (parser_error source start end_ lexbuf)
| Lexer.Error e -> | Lexer.Error e ->
fail @@ (lexer_error e) fail @@ lexer_error e
| _ -> | _ ->
let _ = Printexc.print_backtrace Pervasives.stdout in let () = Printexc.print_backtrace Pervasives.stdout in
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let end_ = Lexing.lexeme_end_p lexbuf in let stop = Lexing.lexeme_end_p lexbuf in
fail @@ (unrecognized_error source start end_ lexbuf) fail @@ unrecognized_error source start stop lexbuf
in in close (); result
close ();
result
let parse_file (source: string) : AST.t result = let parse_file (source: string) : AST.t result =
let pp_input = let pp_input =
@ -177,6 +198,17 @@ let parse_file (source: string) : AST.t result =
let lexbuf = Lexing.from_channel channel in let lexbuf = Lexing.from_channel channel in
parse (Parser.contract) source lexbuf parse (Parser.contract) source lexbuf
let parse_file' (source: string) : AST.t result =
let module IO =
struct
let ext = "ligo"
let options = pre_options ~input:(Some source) ~expr:false
end in
let module Unit = PreUnit(IO) in
match Unit.parse Unit.parse_contract with
Ok ast -> ok ast
| Error error -> failwith "TODO" (* fail @@ parser_or_lexer_error error *)
let parse_string (s:string) : AST.t result = let parse_string (s:string) : AST.t result =
let lexbuf = Lexing.from_string s in let lexbuf = Lexing.from_string s in
parse (Parser.contract) "" lexbuf parse (Parser.contract) "" lexbuf

View File

@ -20,4 +20,6 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml
../shared/LexerUnit.ml ../shared/LexerUnit.ml
../shared/ParserUnit.mli ../shared/ParserUnit.mli
../shared/ParserUnit.ml ../shared/ParserUnit.ml
../shared/Memo.mli
../shared/Memo.ml
Stubs/Simple_utils.ml Stubs/Simple_utils.ml

View File

@ -25,6 +25,7 @@ type 'a reg = 'a Region.reg
type keyword = Region.t type keyword = Region.t
type kwd_and = Region.t type kwd_and = Region.t
type kwd_attributes = Region.t
type kwd_begin = Region.t type kwd_begin = Region.t
type kwd_block = Region.t type kwd_block = Region.t
type kwd_case = Region.t type kwd_case = Region.t
@ -109,7 +110,7 @@ type field_name = string reg
type map_name = string reg type map_name = string reg
type set_name = string reg type set_name = string reg
type constr = string reg type constr = string reg
type attribute = string reg type attribute = string reg
(* Parentheses *) (* Parentheses *)
@ -144,12 +145,12 @@ type t = {
and ast = t and ast = t
and attributes = attribute list reg and attributes = attribute ne_injection reg
and declaration = and declaration =
TypeDecl of type_decl reg TypeDecl of type_decl reg
| ConstDecl of const_decl reg | ConstDecl of const_decl reg
| FunDecl of fun_decl reg | FunDecl of fun_decl reg
and const_decl = { and const_decl = {
kwd_const : kwd_const; kwd_const : kwd_const;
@ -159,7 +160,7 @@ and const_decl = {
equal : equal; equal : equal;
init : expr; init : expr;
terminator : semi option; terminator : semi option;
attributes : attributes; attributes : attributes option
} }
(* Type declarations *) (* Type declarations *)
@ -217,7 +218,7 @@ and fun_decl = {
block_with : (block reg * kwd_with) option; block_with : (block reg * kwd_with) option;
return : expr; return : expr;
terminator : semi option; terminator : semi option;
attributes : attributes; attributes : attributes option;
} }
and parameters = (param_decl, semi) nsepseq par reg and parameters = (param_decl, semi) nsepseq par reg
@ -562,6 +563,7 @@ and field_assign = {
equal : equal; equal : equal;
field_expr : expr field_expr : expr
} }
and record = field_assign reg ne_injection and record = field_assign reg ne_injection
and projection = { and projection = {

View File

@ -28,6 +28,11 @@ type lexeme = string
(* TOKENS *) (* TOKENS *)
type attribute = {
header : string;
string : lexeme Region.reg
}
type t = type t =
(* Literals *) (* Literals *)
@ -38,6 +43,7 @@ type t =
| Mutez of (lexeme * Z.t) Region.reg | Mutez of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg | Ident of lexeme Region.reg
| Constr of lexeme Region.reg | Constr of lexeme Region.reg
| Attr of attribute
(* Symbols *) (* Symbols *)
@ -151,8 +157,7 @@ val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token
val mk_attr : lexeme -> Region.t -> (token, attr_err) result val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result
val mk_attr2 : lexeme -> Region.t -> (token, attr_err) result
val eof : Region.t -> token val eof : Region.t -> token
(* Predicates *) (* Predicates *)

View File

@ -26,6 +26,11 @@ let rollback buffer =
(* TOKENS *) (* TOKENS *)
type attribute = {
header : string;
string : lexeme Region.reg
}
type t = type t =
(* Literals *) (* Literals *)
@ -33,9 +38,10 @@ type t =
| Bytes of (lexeme * Hex.t) Region.reg | Bytes of (lexeme * Hex.t) Region.reg
| Int of (lexeme * Z.t) Region.reg | Int of (lexeme * Z.t) Region.reg
| Nat of (lexeme * Z.t) Region.reg | Nat of (lexeme * Z.t) Region.reg
| Mutez of (lexeme * Z.t) Region.reg | Mutez of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg | Ident of lexeme Region.reg
| Constr of lexeme Region.reg | Constr of lexeme Region.reg
| Attr of attribute
(* Symbols *) (* Symbols *)
@ -144,6 +150,9 @@ let proj_token = function
| Constr Region.{region; value} -> | Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value region, sprintf "Constr \"%s\"" value
| Attr {header; string={region; value}} ->
region, sprintf "Attr (\"%s\",\"%s\")" header value
(* Symbols *) (* Symbols *)
| SEMI region -> region, "SEMI" | SEMI region -> region, "SEMI"
@ -217,7 +226,7 @@ let proj_token = function
| C_None region -> region, "C_None" | C_None region -> region, "C_None"
| C_Some region -> region, "C_Some" | C_Some region -> region, "C_Some"
(* Virtual tokens *) (* Virtual tokens *)
| EOF region -> region, "EOF" | EOF region -> region, "EOF"
@ -233,6 +242,7 @@ let to_lexeme = function
| Mutez i -> fst i.Region.value | Mutez i -> fst i.Region.value
| Ident id | Ident id
| Constr id -> id.Region.value | Constr id -> id.Region.value
| Attr {string; _} -> string.Region.value
(* Symbols *) (* Symbols *)
@ -312,6 +322,7 @@ let to_lexeme = function
| EOF _ -> "" | EOF _ -> ""
(* CONVERSIONS *)
let to_string token ?(offsets=true) mode = let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in let region, val_str = proj_token token in
@ -365,7 +376,7 @@ let keywords = [
let reserved = let reserved =
let open SSet in let open SSet in
empty |> add "args" empty |> add "arguments"
let constructors = [ let constructors = [
(fun reg -> False reg); (fun reg -> False reg);
@ -489,8 +500,6 @@ let eof region = EOF region
type sym_err = Invalid_symbol type sym_err = Invalid_symbol
type attr_err = Invalid_attribute
let mk_sym lexeme region = let mk_sym lexeme region =
match lexeme with match lexeme with
(* Lexemes in common with all concrete syntaxes *) (* Lexemes in common with all concrete syntaxes *)
@ -539,10 +548,9 @@ let mk_constr lexeme region =
(* Attributes *) (* Attributes *)
let mk_attr _lexeme _region = type attr_err = Invalid_attribute
Error Invalid_attribute
let mk_attr2 _lexeme _region = let mk_attr _header _string _region =
Error Invalid_attribute Error Invalid_attribute
(* Predicates *) (* Predicates *)

View File

@ -7,3 +7,8 @@ module IO =
end end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () =
match M.trace () with
Stdlib.Ok _ -> ()
| Error msg -> Printf.eprintf "\027[31m%s\027[0m%!" msg

View File

@ -12,6 +12,7 @@
%token <(LexToken.lexeme * Z.t) Region.reg> Mutez "<mutez>" %token <(LexToken.lexeme * Z.t) Region.reg> Mutez "<mutez>"
%token <LexToken.lexeme Region.reg> Ident "<ident>" %token <LexToken.lexeme Region.reg> Ident "<ident>"
%token <LexToken.lexeme Region.reg> Constr "<constr>" %token <LexToken.lexeme Region.reg> Constr "<constr>"
%token <LexToken.attribute Region.reg> Attr "<attr>"
(* Symbols *) (* Symbols *)

View File

@ -5,39 +5,40 @@
open Region open Region
open AST open AST
(*
type statement_attributes_mixed = type statement_attributes_mixed =
PInstr of instruction PInstr of instruction
| PData of data_decl | PData of data_decl
| PAttributes of attributes | PAttr of attributes
let attributes_to_statement (statement, statements) = let attributes_to_statement (statement, statements) =
if (List.length statements = 0) then match statements with
match statement with [] ->
| PInstr i -> Instr i, [] (match statement with
| PData d -> Data d, [] | PInstr i -> Instr i, []
| PAttributes a -> | PData d -> Data d, []
let open! SyntaxError in | PAttr a ->
raise (Error (Detached_attributes a)) raise (Scoping.Error (Scoping.Detached_attributes a)))
else ( | _ -> (
let statements = (Region.ghost, statement) :: statements in let statements = (Region.ghost, statement) :: statements in
let rec inner result = function let rec inner result = function
| (t, PData (LocalConst const)) :: (_, PAttributes a) :: rest -> | (t, PData (LocalConst const)) :: (_, PAttr a) :: rest ->
inner (result @ [(t, Data (LocalConst {const with value = {const.value with attributes = a}}))]) rest inner (result @ [(t, Data (LocalConst {const with value = {const.value with attributes = a}}))]) rest
| (t, PData (LocalFun func)) :: (_, PAttributes a) :: rest -> | (t, PData (LocalFun func)) :: (_, PAttr a) :: rest ->
inner (result @ [(t, Data (LocalFun {func with value = {func.value with attributes = a}}))]) rest inner (result @ [(t, Data (LocalFun {func with value = {func.value with attributes = a}}))]) rest
| (t, PData d) :: rest -> | (t, PData d) :: rest ->
inner (result @ [(t, Data d)]) rest inner (result @ [(t, Data d)]) rest
| (t, PInstr i) :: rest -> | (t, PInstr i) :: rest ->
inner (result @ [(t, Instr i)]) rest inner (result @ [(t, Instr i)]) rest
| (_, PAttributes _) :: rest -> | (_, PAttr _) :: rest ->
inner result rest inner result rest
| [] -> | [] ->
result result
in in
let result = inner [] statements in let result = inner [] statements in
(snd (List.hd result), List.tl result) (snd (List.hd result), List.tl result)
) )
*)
(* END HEADER *) (* END HEADER *)
%} %}
@ -145,7 +146,7 @@ contract:
declaration: declaration:
type_decl { TypeDecl $1 } type_decl { TypeDecl $1 }
| const_decl { ConstDecl $1 } | const_decl { ConstDecl $1 }
| fun_decl { FunDecl $1 } | fun_decl { FunDecl $1 }
(* Type declarations *) (* Type declarations *)
@ -258,7 +259,7 @@ field_decl:
and value = {field_name=$1; colon=$2; field_type=$3} and value = {field_name=$1; colon=$2; field_type=$3}
in {region; value} } in {region; value} }
fun_expr: fun_expr:
"function" parameters ":" type_expr "is" expr { "function" parameters ":" type_expr "is" expr {
let stop = expr_to_region $6 in let stop = expr_to_region $6 in
@ -290,7 +291,7 @@ open_fun_decl:
block_with = Some ($7, $8); block_with = Some ($7, $8);
return = $9; return = $9;
terminator = None; terminator = None;
attributes = {value = []; region = Region.ghost}} attributes = None}
in {region; value} } in {region; value} }
| "function" fun_name parameters ":" type_expr "is" expr { | "function" fun_name parameters ":" type_expr "is" expr {
Scoping.check_reserved_name $2; Scoping.check_reserved_name $2;
@ -305,14 +306,16 @@ open_fun_decl:
block_with = None; block_with = None;
return = $7; return = $7;
terminator = None; terminator = None;
attributes = {value = []; region = Region.ghost}} attributes = None}
in {region; value} } in {region; value} }
fun_decl: fun_decl:
open_fun_decl semi_attributes { open_fun_decl maybe_attributes? {
let attributes, terminator = $2 in match $2 with
{$1 with value = {$1.value with terminator = terminator; attributes = attributes}} None -> $1
} | Some (terminator, attributes) ->
let value = {$1.value with terminator; attributes}
in {$1 with value} }
parameters: parameters:
par(nsepseq(param_decl,";")) { par(nsepseq(param_decl,";")) {
@ -350,7 +353,7 @@ block:
let statements, terminator = $2 in let statements, terminator = $2 in
let region = cover $1 $3 let region = cover $1 $3
and value = {opening = Begin $1; and value = {opening = Begin $1;
statements = attributes_to_statement statements; statements (*= attributes_to_statement statements*);
terminator; terminator;
closing = End $3} closing = End $3}
in {region; value} in {region; value}
@ -359,15 +362,15 @@ block:
let statements, terminator = $3 in let statements, terminator = $3 in
let region = cover $1 $4 let region = cover $1 $4
and value = {opening = Block ($1,$2); and value = {opening = Block ($1,$2);
statements = attributes_to_statement statements; statements (*= attributes_to_statement statements*);
terminator; terminator;
closing = Block $4} closing = Block $4}
in {region; value} } in {region; value} }
statement: statement:
instruction { PInstr $1 } instruction { (*P*)Instr $1 }
| open_data_decl { PData $1 } | open_data_decl { (*P*)Data $1 }
| attributes { PAttributes $1 } (*| attributes { PAttr $1 }*)
open_data_decl: open_data_decl:
open_const_decl { LocalConst $1 } open_const_decl { LocalConst $1 }
@ -385,10 +388,9 @@ open_const_decl:
equal; equal;
init; init;
terminator = None; terminator = None;
attributes = {value = []; region = Region.ghost}} attributes = None}
in {region; value} } in {region; value} }
open_var_decl: open_var_decl:
"var" unqualified_decl(":=") { "var" unqualified_decl(":=") {
let name, colon, var_type, assign, init, stop = $2 in let name, colon, var_type, assign, init, stop = $2 in
@ -399,8 +401,7 @@ open_var_decl:
var_type; var_type;
assign; assign;
init; init;
terminator = None; terminator=None}
}
in {region; value} } in {region; value} }
unqualified_decl(OP): unqualified_decl(OP):
@ -409,23 +410,20 @@ unqualified_decl(OP):
let region = expr_to_region $5 let region = expr_to_region $5
in $1, $2, $3, $4, $5, region } in $1, $2, $3, $4, $5, region }
attributes: attributes:
"attributes" "[" nsepseq(String,";") "]" { ne_injection("attributes","<string>") { $1 }
let region = cover $1 $4 in
let value = (Utils.nsepseq_to_list $3) in
{region; value}
}
semi_attributes: maybe_attributes:
/* empty */ { {value = []; region = Region.ghost}, None } ";" { Some $1, None }
| ";" { {value = []; region = Region.ghost}, Some $1 } | ";" attributes ";" { Some $1, Some $2 }
| ";" attributes ";" { $2, Some $1 }
const_decl: const_decl:
open_const_decl semi_attributes { open_const_decl maybe_attributes? {
let attributes, terminator = $2 in match $2 with
{$1 with value = {$1.value with terminator = terminator; attributes = attributes }} None -> $1
} | Some (terminator, attributes) ->
let value = {$1.value with terminator; attributes}
in {$1 with value} }
instruction: instruction:
conditional { Cond $1 } conditional { Cond $1 }
@ -589,7 +587,7 @@ clause_block:
let statements, terminator = $2 in let statements, terminator = $2 in
let region = cover $1 $3 in let region = cover $1 $3 in
let value = {lbrace = $1; let value = {lbrace = $1;
inside = attributes_to_statement statements, terminator; inside = (*attributes_to_statement*) statements, terminator;
rbrace = $3} in rbrace = $3} in
ShortBlock {value; region} } ShortBlock {value; region} }

View File

@ -114,12 +114,10 @@ let rec print_tokens state ast =
Utils.nseq_iter (print_decl state) decl; Utils.nseq_iter (print_decl state) decl;
print_token state eof "EOF" print_token state eof "EOF"
and print_attributes state attributes = and print_attributes state = function
let attributes = List.fold_left (fun all a -> all ^ a.value ^ ";") "" attributes.value in None -> ()
let line = | Some attr ->
sprintf "attributes[%s]" print_ne_injection state "attributes" print_string attr
attributes
in Buffer.add_string state#buffer line
and print_decl state = function and print_decl state = function
TypeDecl decl -> print_type_decl state decl TypeDecl decl -> print_type_decl state decl
@ -607,7 +605,7 @@ and print_field_assign state {value; _} =
print_token state equal "="; print_token state equal "=";
print_expr state field_expr print_expr state field_expr
and print_update_expr state {value; _} = and print_update_expr state {value; _} =
let {record; kwd_with; updates} = value in let {record; kwd_with; updates} = value in
print_path state record; print_path state record;
print_token state kwd_with "with"; print_token state kwd_with "with";
@ -850,19 +848,23 @@ and pp_declaration state = function
pp_fun_decl state value pp_fun_decl state value
and pp_fun_decl state decl = and pp_fun_decl state decl =
let arity =
match decl.attributes with
None -> 5
| Some _ -> 6 in
let () = let () =
let state = state#pad 5 0 in let state = state#pad arity 0 in
pp_ident state decl.fun_name in pp_ident state decl.fun_name in
let () = let () =
let state = state#pad 5 1 in let state = state#pad arity 1 in
pp_node state "<parameters>"; pp_node state "<parameters>";
pp_parameters state decl.param in pp_parameters state decl.param in
let () = let () =
let state = state#pad 5 2 in let state = state#pad arity 2 in
pp_node state "<return type>"; pp_node state "<return type>";
pp_type_expr (state#pad 1 0) decl.ret_type in pp_type_expr (state#pad 1 0) decl.ret_type in
let () = let () =
let state = state#pad 5 3 in let state = state#pad arity 3 in
pp_node state "<body>"; pp_node state "<body>";
let statements = let statements =
match decl.block_with with match decl.block_with with
@ -870,15 +872,35 @@ and pp_fun_decl state decl =
| None -> Instr (Skip Region.ghost), [] in | None -> Instr (Skip Region.ghost), [] in
pp_statements state statements in pp_statements state statements in
let () = let () =
let state = state#pad 5 4 in let state = state#pad arity 4 in
pp_node state "<return>"; pp_node state "<return>";
pp_expr (state#pad 1 0) decl.return pp_expr (state#pad 1 0) decl.return in
let () =
match decl.attributes with
None -> ()
| Some attr ->
let state = state#pad arity 5 in
pp_node state "<attributes>";
pp_attributes (state#pad 1 0) attr
in () in ()
and pp_attributes state {value; _} =
pp_ne_injection pp_string state value
and pp_const_decl state decl = and pp_const_decl state decl =
pp_ident (state#pad 3 0) decl.name; let arity =
pp_type_expr (state#pad 3 1) decl.const_type; match decl.attributes with
pp_expr (state#pad 3 2) decl.init None -> 3
| Some _ -> 4 in
pp_ident (state#pad arity 0) decl.name;
pp_type_expr (state#pad arity 1) decl.const_type;
pp_expr (state#pad arity 2) decl.init;
match decl.attributes with
None -> ()
| Some attr ->
let state = state#pad arity 3 in
pp_node state "<attributes>";
pp_attributes (state#pad 1 0) attr
and pp_type_expr state = function and pp_type_expr state = function
TProd cartesian -> TProd cartesian ->

View File

@ -6,6 +6,7 @@ type t =
| Duplicate_variant of AST.variable | Duplicate_variant of AST.variable
| Non_linear_pattern of AST.variable | Non_linear_pattern of AST.variable
| Duplicate_field of AST.variable | Duplicate_field of AST.variable
| Detached_attributes of AST.attributes
type error = t type error = t

View File

@ -47,9 +47,9 @@ let help language extension () =
printf "where <input>%s is the %s source file (default: stdin),\n" extension language; printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
print "and each <option> (if any) is one of the following:"; print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)"; print " -I <paths> Library paths (colon-separated)";
print " -c, --copy Print lexemes of tokens and markup (lexer)"; print " -t, --tokens Print tokens";
print " -t, --tokens Print tokens (lexer)"; print " -u, --units Print lexical units";
print " -u, --units Print tokens and markup (lexer)"; print " -c, --copy Print lexemes and markup";
print " -q, --quiet No output, except errors (default)"; print " -q, --quiet No output, except errors (default)";
print " --columns Columns for source locations"; print " --columns Columns for source locations";
print " --bytes Bytes for source locations"; print " --bytes Bytes for source locations";

View File

@ -77,8 +77,7 @@ module type TOKEN =
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token
val mk_attr : lexeme -> Region.t -> (token, attr_err) result val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result
val mk_attr2 : lexeme -> Region.t -> (token, attr_err) result
val eof : Region.t -> token val eof : Region.t -> token
(* Predicates *) (* Predicates *)

View File

@ -119,8 +119,7 @@ module type TOKEN =
val mk_string : lexeme -> Region.t -> token val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token val mk_bytes : lexeme -> Region.t -> token
val mk_constr : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token
val mk_attr : lexeme -> Region.t -> (token, attr_err) result val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result
val mk_attr2 : lexeme -> Region.t -> (token, attr_err) result
val eof : Region.t -> token val eof : Region.t -> token
(* Predicates *) (* Predicates *)
@ -178,8 +177,9 @@ module type S =
exception Error of error Region.reg exception Error of error Region.reg
val format_error : ?offsets:bool -> [`Byte | `Point] -> val format_error :
error Region.reg -> file:bool -> string ?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string
end end
(* The functorised interface (* The functorised interface
@ -436,15 +436,15 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
Hint: Check the LIGO syntax you use.\n" Hint: Check the LIGO syntax you use.\n"
| Invalid_natural -> | Invalid_natural ->
"Invalid natural." "Invalid natural."
| Invalid_attribute -> | Invalid_attribute ->
"Invalid attribute." "Invalid attribute."
exception Error of error Region.reg exception Error of error Region.reg
let format_error ?(offsets=true) mode Region.{region; value} ~file = let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value in let msg = error_to_string value
let reg = region#to_string ~file ~offsets mode in and reg = region#to_string ~file ~offsets mode
sprintf "\027[31mLexical error %s:\n%s\027[0m%!" reg msg in sprintf "Lexical error %s:\n%s" reg msg
let fail region value = raise (Error Region.{region; value}) let fail region value = raise (Error Region.{region; value})
@ -506,7 +506,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
let num = Z.of_string (integral ^ fractional) let num = Z.of_string (integral ^ fractional)
and den = Z.of_string ("1" ^ String.make (len-index-1) '0') and den = Z.of_string ("1" ^ String.make (len-index-1) '0')
and million = Q.of_string "1000000" in and million = Q.of_string "1000000" in
let mutez = Q.make num den |> Q.mul million in let mutez = Q.make num den |> Q.mul million in
let should_be_1 = Q.den mutez in let should_be_1 = Q.den mutez in
if Z.equal Z.one should_be_1 then Some (Q.num mutez) else None if Z.equal Z.one should_be_1 then Some (Q.num mutez) else None
| exception Not_found -> assert false | exception Not_found -> assert false
@ -531,22 +531,14 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
Ok token -> token, state Ok token -> token, state
| Error Token.Reserved_name -> fail region (Reserved_name lexeme) | Error Token.Reserved_name -> fail region (Reserved_name lexeme)
let mk_attr state buffer attr = let mk_attr header attr state buffer =
let region, _, state = sync state buffer in let region, _, state = sync state buffer in
match Token.mk_attr attr region with match Token.mk_attr header attr region with
Ok token -> Ok token ->
token, state token, state
| Error Token.Invalid_attribute -> | Error Token.Invalid_attribute ->
fail region Invalid_attribute fail region Invalid_attribute
let mk_attr2 state buffer attr =
let region, _, state = sync state buffer in
match Token.mk_attr2 attr region with
Ok token ->
token, state
| Error Token.Invalid_attribute ->
fail region Invalid_attribute
let mk_constr state buffer = let mk_constr state buffer =
let region, lexeme, state = sync state buffer let region, lexeme, state = sync state buffer
in Token.mk_constr lexeme region, state in Token.mk_constr lexeme region, state
@ -560,7 +552,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
let mk_eof state buffer = let mk_eof state buffer =
let region, _, state = sync state buffer let region, _, state = sync state buffer
in Token.eof region, state in Token.eof region, state
(* END HEADER *) (* END HEADER *)
} }
@ -580,6 +572,7 @@ let capital = ['A'-'Z']
let letter = small | capital let letter = small | capital
let ident = small (letter | '_' | digit)* let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)* let constr = capital (letter | '_' | digit)*
let attr = ident | constr
let hexa_digit = digit | ['A'-'F'] let hexa_digit = digit | ['A'-'F']
let byte = hexa_digit hexa_digit let byte = hexa_digit hexa_digit
let byte_seq = byte | byte (byte | '_')* byte let byte_seq = byte | byte (byte | '_')* byte
@ -587,8 +580,8 @@ let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\"" | "\\\\" | "\\b" let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
| "\\r" | "\\t" | "\\x" byte | "\\r" | "\\t" | "\\x" byte
let pascaligo_sym = "=/=" | '#' | ":=" let pascaligo_sym = "=/=" | '#' | ":="
let cameligo_sym = "<>" | "::" | "||" | "&&" | "[@" let cameligo_sym = "<>" | "::" | "||" | "&&"
let reasonligo_sym = '!' | "=>" | "!=" | "==" | "++" | "..." | "||" | "&&" | "[@" let reasonligo_sym = '!' | "=>" | "!=" | "==" | "++" | "..." | "||" | "&&"
let symbol = let symbol =
';' | ',' | '(' | ')'| '[' | ']' | '{' | '}' ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}'
@ -614,21 +607,24 @@ rule init state = parse
| _ { rollback lexbuf; scan state lexbuf } | _ { rollback lexbuf; scan state lexbuf }
and scan state = parse and scan state = parse
nl { scan (push_newline state lexbuf) lexbuf } nl { scan (push_newline state lexbuf) lexbuf }
| ' '+ { scan (push_space state lexbuf) lexbuf } | ' '+ { scan (push_space state lexbuf) lexbuf }
| '\t'+ { scan (push_tabs state lexbuf) lexbuf } | '\t'+ { scan (push_tabs state lexbuf) lexbuf }
| ident { mk_ident state lexbuf |> enqueue } | ident { mk_ident state lexbuf |> enqueue }
| constr { mk_constr state lexbuf |> enqueue } | constr { mk_constr state lexbuf |> enqueue }
| bytes { (mk_bytes seq) state lexbuf |> enqueue } | bytes { mk_bytes seq state lexbuf |> enqueue }
| natural 'n' { mk_nat state lexbuf |> enqueue } | natural 'n' { mk_nat state lexbuf |> enqueue }
| natural "mutez" { mk_mutez state lexbuf |> enqueue } | natural "mutez" { mk_mutez state lexbuf |> enqueue }
| natural "tz" { mk_tz state lexbuf |> enqueue } | natural "tz"
| decimal "tz" { mk_tz_decimal state lexbuf |> enqueue } | natural "tez" { mk_tz state lexbuf |> enqueue }
| natural { mk_int state lexbuf |> enqueue } | decimal "tz"
| symbol { mk_sym state lexbuf |> enqueue } | decimal "tez" { mk_tz_decimal state lexbuf |> enqueue }
| eof { mk_eof state lexbuf |> enqueue } | natural { mk_int state lexbuf |> enqueue }
| "[@" (ident|constr as attr) "]" { mk_attr state lexbuf attr |> enqueue } | symbol { mk_sym state lexbuf |> enqueue }
| "[@@" (ident|constr as attr) "]" { mk_attr2 state lexbuf attr |> enqueue } | eof { mk_eof state lexbuf |> enqueue }
| "[@" (attr as a) "]" { mk_attr "[@" a state lexbuf |> enqueue }
| "[@@" (attr as a) "]" { mk_attr "[@@" a state lexbuf |> enqueue }
| '"' { let opening, _, state = sync state lexbuf in | '"' { let opening, _, state = sync state lexbuf in
let thread = {opening; len=1; acc=['"']} in let thread = {opening; len=1; acc=['"']} in
scan_string thread state lexbuf |> mk_string |> enqueue } scan_string thread state lexbuf |> mk_string |> enqueue }
@ -677,8 +673,7 @@ and scan state = parse
and file = Filename.basename file in and file = Filename.basename file in
let pos = state.pos#set ~file ~line ~offset:0 in let pos = state.pos#set ~file ~line ~offset:0 in
let state = {state with pos} in let state = {state with pos} in
scan state lexbuf scan state lexbuf }
}
(* Some special errors (* Some special errors

View File

@ -1,7 +1,5 @@
(** Embedding the LIGO lexer in a debug module *) (** Embedding the LIGO lexer in a debug module *)
let sprintf = Printf.sprintf
module type S = module type S =
sig sig
module Lexer : Lexer.S module Lexer : Lexer.S
@ -15,12 +13,12 @@ module type S =
val trace : val trace :
?offsets:bool -> [`Byte | `Point] -> ?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit file_path option -> EvalOpt.command ->
(unit, string) Stdlib.result
end end
module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) = module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
struct struct
module Lexer = Lexer module Lexer = Lexer
module Token = Lexer.Token module Token = Lexer.Token
@ -49,28 +47,29 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
type file_path = string type file_path = string
let trace ?(offsets=true) mode file_path_opt command : unit = let trace ?(offsets=true) mode file_path_opt command :
(unit, string) Stdlib.result =
try try
let Lexer.{read; buffer; close; _} = let Lexer.{read; buffer; close; _} =
Lexer.open_token_stream file_path_opt Lexer.open_token_stream file_path_opt in
and cout = stdout in let log = output_token ~offsets mode command stdout
let log = output_token ~offsets mode command cout and close_all () = close (); close_out stdout in
and close_all () = close (); close_out cout in
let rec iter () = let rec iter () =
match read ~log buffer with match read ~log buffer with
token -> token ->
if Token.is_eof token then close_all () if Token.is_eof token
then Stdlib.Ok ()
else iter () else iter ()
| exception Lexer.Error e -> | exception Lexer.Error error ->
let file = let file =
match file_path_opt with match file_path_opt with
None | Some "-" -> false None | Some "-" -> false
| Some _ -> true in | Some _ -> true in
let msg = let msg =
Lexer.format_error ~offsets mode e ~file Lexer.format_error ~offsets mode ~file error
in prerr_string msg; in Stdlib.Error msg in
close_all () let result = iter ()
in iter () in (close_all (); result)
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg) with Sys_error msg -> Stdlib.Error msg
end end

View File

@ -11,7 +11,8 @@ module type S =
val trace : val trace :
?offsets:bool -> [`Byte | `Point] -> ?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit file_path option -> EvalOpt.command ->
(unit, string) Stdlib.result
end end
module Make (Lexer: Lexer.S) : S with module Lexer = Lexer module Make (Lexer: Lexer.S) : S with module Lexer = Lexer

View File

@ -1,21 +1,20 @@
(* Functor to build a standalone LIGO lexer *) (* Functor to build a standalone LIGO lexer *)
module type S = module type IO =
sig sig
val ext : string (* LIGO file extension *) val ext : string (* LIGO file extension *)
val options : EvalOpt.options (* CLI options *) val options : EvalOpt.options (* CLI options *)
end end
module Make (IO: S) (Lexer: Lexer.S) = module Make (IO: IO) (Lexer: Lexer.S) =
struct struct
open Printf open Printf
module SSet = Utils.String.Set
(* Error printing and exception tracing *) (* Error printing and exception tracing *)
let () = Printexc.record_backtrace true let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (sprintf "External error: %s" text); exit 1
(* Preprocessing the input source and opening the input channels *) (* Preprocessing the input source and opening the input channels *)
(* Path for CPP inclusions (#include) *) (* Path for CPP inclusions (#include) *)
@ -29,7 +28,7 @@ module Make (IO: S) (Lexer: Lexer.S) =
let prefix = let prefix =
match IO.options#input with match IO.options#input with
None | Some "-" -> "temp" None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension) | Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp" ^ IO.ext let suffix = ".pp" ^ IO.ext
@ -42,24 +41,68 @@ module Make (IO: S) (Lexer: Lexer.S) =
let cpp_cmd = let cpp_cmd =
match IO.options#input with match IO.options#input with
None | Some "-" -> None | Some "-" ->
sprintf "cpp -traditional-cpp%s - > %s" sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input lib_path pp_input
| Some file -> | Some file ->
sprintf "cpp -traditional-cpp%s %s > %s" sprintf "cpp -traditional-cpp%s %s > %s"
lib_path file pp_input lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" IO.options#verbose
then eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *) (* Running the lexer on the input file *)
let scan () : (Lexer.token list, string) Stdlib.result =
(* Preprocessing the input *)
if SSet.mem "cpp" IO.options#verbose
then eprintf "%s\n%!" cpp_cmd
else ();
if Sys.command cpp_cmd <> 0 then
let msg =
sprintf "External error: the command \"%s\" failed." cpp_cmd
in Stdlib.Error msg
else
try
let Lexer.{read; buffer; close; _} =
Lexer.open_token_stream (Some pp_input) in
let close_all () = close (); close_out stdout in
let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with
token ->
if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens)
| exception Lexer.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg in
let result = read_tokens []
in close_all (); result
with Sys_error msg -> close_out stdout; Stdlib.Error msg
(* Tracing the lexing (effectful) *)
module Log = LexerLog.Make (Lexer) module Log = LexerLog.Make (Lexer)
let () = Log.trace ~offsets:IO.options#offsets let trace () : (unit, string) Stdlib.result =
IO.options#mode (Some pp_input) (* Preprocessing the input *)
IO.options#cmd
if SSet.mem "cpp" IO.options#verbose
then eprintf "%s\n%!" cpp_cmd
else ();
if Sys.command cpp_cmd <> 0 then
let msg =
sprintf "External error: the command \"%s\" failed." cpp_cmd
in Stdlib.Error msg
else
Log.trace ~offsets:IO.options#offsets
IO.options#mode
(Some pp_input)
IO.options#cmd
end end

View File

@ -0,0 +1,13 @@
(* Functor to build a standalone LIGO lexer *)
module type IO =
sig
val ext : string (* LIGO file extension *)
val options : EvalOpt.options (* CLI options *)
end
module Make (IO: IO) (Lexer: Lexer.S) :
sig
val scan : unit -> (Lexer.token list, string) Stdlib.result
val trace : unit -> (unit, string) Stdlib.result
end

View File

@ -0,0 +1,163 @@
module Region = Simple_utils.Region
type macro = {
origin : Region.t; (* Not ghost *)
current : Region.t (* Maybe ghost *)
}
type location =
Loc of Region.t (* Not ghost *)
| Link of macro
(* Regions must not be ghosts and strings must not be empty. *)
type valid_lexeme = string Region.reg (* Not ghost, not empty. *)
type invalid_lexeme = string Region.reg (* Not ghost, empty if EOF. *)
type phase =
Lexer
| Parser of valid_lexeme option * invalid_lexeme
| Scoping
type error = <
location : location;
message : string; (* Sentence ending with a period *)
hint : string; (* Suggestion to solve the issue *)
help : string (* Off-program help *)
>
type invalid_error = Ghost_region
let check_loc = function
Loc reg ->
if reg#is_ghost then
Stdlib.Error Ghost_region
else Ok ()
| Link {origin; _} ->
if origin#is_ghost then
Stdlib.Error Ghost_region
else Ok ()
let make_error ~location ~message ~hint ~help =
match check_loc location with
Stdlib.Ok () ->
Ok (object
method location = location
method message = message
method hint = hint
method help = help
end)
| Error _ as e -> e
type warning = <
location : location;
message : string; (* Sentence ending with a period *)
hint : string; (* Idem *)
>
type invalid_warning = invalid_error
let make_warning ~location ~message ~hint =
match check_loc location with
Stdlib.Ok () ->
Ok (object
method location = location
method message = message
method hint = hint
method help = help
end)
| Error _ as e -> e
type kind =
Error of error (* Failure of an external invariant *)
| Internal of string (* Failure of an internal invariant *)
| External of string (* Failure of an external process *)
| Warning of warning
| Info of (unit -> string) (* Log *)
type entry = <
phase : phase;
kind : kind
>
type invalid_entry =
Ghost_lexeme
| Empty_lexeme
let check_phase = function
Parser (Some valid_lexeme, invalid_lexeme) ->
let open Region in
if valid_lexeme.region#is_ghost
|| invalid_lexeme.region#is_ghost
then Stdlib.Error Ghost_lexeme
else if valid_lexeme.value = ""
then Stdlib.Error Empty_lexeme
else Ok ()
| Parser (None, invalid_lexeme) ->
if invalid_lexeme.region#is_ghost
then Stdlib.Error Ghost_lexeme
else Ok ()
| Lexer
| Scoping -> Ok ()
let make_entry ~phase ~kind =
match check_phase phase with
Stdlib.Error _ as e -> e
| Ok () -> Ok (object
method phase = phase
method kind = kind
end)
type memo = <
mode : [`Byte | `Point]; (* Bytes vs UTF-8 *)
offsets : bool; (* [true] for horizontal offsets *)
log : entry FQueue.t
>
type t = memo
let empty_memo ~mode ~offsets : memo =
object
method mode = mode
method offsets = offsets
method log = FQueue.empty
method enqueue entry = {< log = FQueue.enq entry log >}
method dequeue =
match FQueue.deq log with
None -> None
| Some (log, entry) -> Some ({< log=log >}, entry)
end
let sprintf = Printf.sprintf
let string_of_entry ~(file:bool) entry : string =
let reg = entry#region#to_string
~file
~offsets:entry#offsets
error#mode in
let string =
match error#phase with
Parser (None, invalid_lexeme) ->
(match invalid_lexeme.Region.value with
"" -> sprintf "Parse error %s" reg (* EOF *)
| lexeme -> sprintf "Parse error %s, before \"%s\""
reg lexeme)
| Parser (Some valid_lexeme, invalid_lexeme) ->
let string =
sprintf "Parse error %s, after \"%s\""
reg valid_lexeme.Region.value in
(match invalid_lexeme.Region.value with
"" -> string (* EOF *)
| lexeme -> sprintf "%s and before \"%s\"" string lexeme)
| Lexer ->
sprintf "Lexical error %s" reg
| Scoping ->
sprintf "Scoping error %s" reg in
let string =
string
^ (if error#message = "" then "."
else ":\n" ^ error#message) ^ "\n" in
let string =
string ^ (if error#hint = "" then ""
else sprintf "Hint: %s\n" error#hint)
in string

View File

@ -0,0 +1,120 @@
(* This module defines compilation memos. *)
(* Locations *)
module Region = Simple_utils.Region
type macro = private <
origin : Region.t; (* Not ghost *)
current : Region.t (* Maybe ghost *)
>
type location = private
Loc of Region.t (* Not ghost *)
| Link of macro
type invalid_loc = Ghost_region
val make_loc :
Region.t -> (location, invalid_loc) Stdlib.result
val make_link :
origin:Region.t ->
current:Region.t ->
(location, invalid_loc) Stdlib.result
type 'a located = <
value : 'a;
location : location
>
val make_located : value:'a -> location:location -> 'a located
(* Lexemes *)
type lexeme = string location (* Not ghost, empty => EOF *)
type window = <
valid_lexeme : lexeme option;
invalid_lexeme : lexeme
>
val make_window : ?valid:lexeme -> invalid:lexeme -> window
(* Compilation phases *)
type phase =
Lexer
| Parser of window
| Scoping
(* Messages *)
type message = private string
type invalid_message = Empty_message
val make_message : string -> (message, invalid_error) Stdlib.result
val string_of_message : message -> string
(* Errors *)
type error = <
location : location;
message : message; (* Non-empty string (ending with a period) *)
hint : string; (* Suggestion to solve the issue (may be empty) *)
help : string (* Off-program help (may be empty) *)
>
val make_error :
location:location ->
message:message ->
hint:string ->
help:string ->
error
(* Warnings *)
type warning = <
location : location;
message : message; (* Non-empty string (ending with a period) *)
hint : string; (* May empty *)
>
val make_warning :
location:location ->
message:message ->
hint:string ->
warning
(* Kinds of entries *)
type kind =
Error of error (* Failure of an external invariant *)
| Internal of message (* Failure of an internal invariant (non-empty) *)
| External of message (* Failure of an external process (non-empty) *)
| Warning of warning
| Info of (unit -> message) (* Log (not-empty) *)
type entry = private <
phase : phase;
kind : kind
>
val make_entry : phase:phase -> kind:kind -> entry
val string_of_entry : file:bool -> entry -> string
(* Memos *)
type memo = <
mode : [`Byte | `Point]; (* Bytes vs UTF-8 *)
offsets : bool; (* [true] for horizontal offsets *)
log : entry FQueue.t;
enqueue : entry -> memo;
dequeue : (memo * entry) option
>
type t = memo
val empty_memo : mode:[`Byte | `Point] -> offsets:bool -> memo

View File

@ -90,11 +90,16 @@ module Make (Lexer: Lexer.S)
module Incr = Parser.Incremental module Incr = Parser.Incremental
let incr_contract Lexer.{read; buffer; get_win; close; _} : Parser.ast = let incr_contract memo Lexer.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier read buffer let supplier = I.lexer_lexbuf_to_supplier read buffer
and failure = failure get_win in and failure = failure get_win in
let parser = Incr.contract buffer.Lexing.lex_curr_p in let parser = Incr.contract buffer.Lexing.lex_curr_p in
let ast = I.loop_handle success failure supplier parser let ast =
try I.loop_handle success failure supplier parser with
Point (message, valid_opt, invalid) ->
let error = Memo. (* TODO *)
in Stdlib.Error ()
in close (); ast in close (); ast
let mono_contract = Parser.contract let mono_contract = Parser.contract
@ -117,22 +122,17 @@ module Make (Lexer: Lexer.S)
let trailer = let trailer =
match valid_opt with match valid_opt with
None -> None ->
if Lexer.Token.is_eof invalid then "" if Lexer.Token.is_eof invalid then ""
else let invalid_lexeme = Lexer.Token.to_lexeme invalid in else let invalid_lexeme = Lexer.Token.to_lexeme invalid in
Printf.sprintf ", before \"%s\"" invalid_lexeme Printf.sprintf ", before \"%s\"" invalid_lexeme
| Some valid -> | Some valid ->
let valid_lexeme = Lexer.Token.to_lexeme valid in let valid_lexeme = Lexer.Token.to_lexeme valid in
let s = Printf.sprintf ", after \"%s\"" valid_lexeme in let s = Printf.sprintf ", after \"%s\"" valid_lexeme in
if Lexer.Token.is_eof invalid then s if Lexer.Token.is_eof invalid then s
else else
let invalid_lexeme = Lexer.Token.to_lexeme invalid in let invalid_lexeme = Lexer.Token.to_lexeme invalid in
Printf.sprintf "%s and before \"%s\"" s invalid_lexeme in Printf.sprintf "%s and before \"%s\"" s invalid_lexeme in
let header = header ^ trailer in let header = header ^ trailer in
header ^ (if msg = "" then ".\n" else ":\n" ^ msg) header ^ (if msg = "" then ".\n" else ":\n" ^ msg)
let short_error ?(offsets=true) mode msg (invalid_region: Region.t) =
let () = assert (not (invalid_region#is_ghost)) in
let header =
"Parse error " ^ invalid_region#to_string ~offsets mode in
header ^ (if msg = "" then ".\n" else ":\n" ^ msg)
end end

View File

@ -2,6 +2,9 @@
module Region = Simple_utils.Region module Region = Simple_utils.Region
(* The signature generated by Menhir with additional type definitions
for [ast] and [expr]. *)
module type PARSER = module type PARSER =
sig sig
(* The type of tokens. *) (* The type of tokens. *)
@ -44,30 +47,24 @@ module Make (Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.Token.token) (Parser: PARSER with type token = Lexer.Token.token)
(ParErr: sig val message : int -> string end) : (ParErr: sig val message : int -> string end) :
sig sig
(* Monolithic and incremental APIs of Menhir for parsing *) (* The monolithic API of Menhir with memos *)
val mono_contract : val mono_contract :
(Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> Parser.ast (Lexing.lexbuf -> Lexer.token) ->
val incr_contract : Lexing.lexbuf ->
Lexer.instance -> Parser.ast (Parser.ast, string) Stdlib.result
val mono_expr : val mono_expr :
(Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> Parser.expr (Lexing.lexbuf -> Lexer.token) ->
Lexing.lexbuf ->
(Parser.expr, string) Stdlib.result
(* Incremental API of Menhir with memos *)
val incr_contract :
Lexer.instance -> (Parser.ast, string) Stdlib.result
val incr_expr : val incr_expr :
Lexer.instance -> Parser.expr Lexer.instance ->
(Parser.expr, string) Stdlib.result
(* Error handling *)
type message = string
type valid = Parser.token
type invalid = Parser.token
type error = message * valid option * invalid
exception Point of error
val format_error :
?offsets:bool -> [`Byte | `Point] -> error -> string
val short_error :
?offsets:bool -> [`Byte | `Point] -> message -> Region.t -> string
end end

View File

@ -88,7 +88,6 @@ module Make (Lexer: Lexer.S)
module Front = ParserAPI.Make (Lexer)(Parser)(ParErr) module Front = ParserAPI.Make (Lexer)(Parser)(ParErr)
let format_error = Front.format_error let format_error = Front.format_error
let short_error = Front.short_error
(* Parsing an expression *) (* Parsing an expression *)
@ -96,25 +95,28 @@ module Make (Lexer: Lexer.S)
(AST.expr, string) Stdlib.result = (AST.expr, string) Stdlib.result =
let close_all () = let close_all () =
lexer_inst.Lexer.close (); close_out stdout in lexer_inst.Lexer.close (); close_out stdout in
let lexbuf = lexer_inst.Lexer.buffer in
let expr = let expr =
if IO.options#mono then try
Front.mono_expr tokeniser lexer_inst.Lexer.buffer if IO.options#mono then
else Front.mono_expr tokeniser lexbuf
Front.incr_expr lexer_inst in else
Front.incr_expr lexer_inst
with exn -> close_all (); raise exn in
let () = let () =
if SSet.mem "ast-tokens" IO.options#verbose if SSet.mem "ast-tokens" IO.options#verbose then
then begin begin
Buffer.clear output; Buffer.clear output;
ParserLog.print_expr state expr; ParserLog.print_expr state expr;
Buffer.output_buffer stdout output Buffer.output_buffer stdout output
end in end in
let () = let () =
if SSet.mem "ast" IO.options#verbose if SSet.mem "ast" IO.options#verbose then
then begin begin
Buffer.clear output; Buffer.clear output;
ParserLog.pp_expr state expr; ParserLog.pp_expr state expr;
Buffer.output_buffer stdout output Buffer.output_buffer stdout output
end end
in close_all (); Ok expr in close_all (); Ok expr
(* Parsing a contract *) (* Parsing a contract *)
@ -123,27 +125,28 @@ module Make (Lexer: Lexer.S)
: (AST.t, string) Stdlib.result = : (AST.t, string) Stdlib.result =
let close_all () = let close_all () =
lexer_inst.Lexer.close (); close_out stdout in lexer_inst.Lexer.close (); close_out stdout in
let lexbuf = lexer_inst.Lexer.buffer in
let ast = let ast =
try try
if IO.options#mono then if IO.options#mono then
Front.mono_contract tokeniser lexer_inst.Lexer.buffer Front.mono_contract tokeniser lexbuf
else else
Front.incr_contract lexer_inst Front.incr_contract lexer_inst
with exn -> close_all (); raise exn in with exn -> close_all (); raise exn in
let () = let () =
if SSet.mem "ast" IO.options#verbose if SSet.mem "ast-tokens" IO.options#verbose then
then begin begin
Buffer.clear output; Buffer.clear output;
ParserLog.pp_ast state ast; ParserLog.print_tokens state ast;
Buffer.output_buffer stdout output Buffer.output_buffer stdout output
end in end in
let () = let () =
if SSet.mem "ast-tokens" IO.options#verbose if SSet.mem "ast" IO.options#verbose then
then begin begin
Buffer.clear output; Buffer.clear output;
ParserLog.print_tokens state ast; ParserLog.pp_ast state ast;
Buffer.output_buffer stdout output Buffer.output_buffer stdout output
end end
in close_all (); Ok ast in close_all (); Ok ast
(* Wrapper for the parsers above *) (* Wrapper for the parsers above *)
@ -157,7 +160,7 @@ module Make (Lexer: Lexer.S)
if Sys.command cpp_cmd <> 0 then if Sys.command cpp_cmd <> 0 then
let msg = let msg =
sprintf "External error: the command \"%s\" failed." cpp_cmd sprintf "External error: \"%s\" failed." cpp_cmd
in Stdlib.Error msg in Stdlib.Error msg
else else
(* Instantiating the lexer *) (* Instantiating the lexer *)

View File

@ -46,9 +46,6 @@ module Make (Lexer: Lexer.S)
val format_error : val format_error :
?offsets:bool -> [`Byte | `Point] -> error -> string ?offsets:bool -> [`Byte | `Point] -> error -> string
val short_error :
?offsets:bool -> [`Byte | `Point] -> message -> Region.t -> string
(* Parsers *) (* Parsers *)
val parse : val parse :

View File

@ -23,9 +23,10 @@ type t = <
is_ghost : bool; is_ghost : bool;
to_string : ?offsets:bool -> [`Byte | `Point] -> string; to_string :
compact : ?offsets:bool -> [`Byte | `Point] -> string; ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string compact :
?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
> >
type pos = t type pos = t
@ -55,14 +56,15 @@ let make ~byte ~point_num ~point_bol =
method set_offset offset = method set_offset offset =
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >} {< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
method set ~file ~line ~offset = method set ?file ~line ~offset =
let pos = self#set_file file in let pos =
match file with
None -> self
| Some name -> self#set_file name in
let pos = pos#set_line line in let pos = pos#set_line line in
let pos = pos#set_offset offset let pos = pos#set_offset offset
in pos in pos
(* The string must not contain '\n'. See [new_line]. *)
method shift_bytes len = method shift_bytes len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len}; {< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + len >} point_num = point_num + len >}
@ -77,11 +79,13 @@ let make ~byte ~point_num ~point_bol =
pos_bol = byte.pos_cnum}; pos_bol = byte.pos_cnum};
point_bol = point_num >} point_bol = point_num >}
(* The string must not contain '\n'. See [add_line]. *)
method new_line string = method new_line string =
let len = String.length string let len = String.length string
in (self#shift_bytes len)#add_nl in (self#shift_bytes len)#add_nl
method is_ghost = byte = Lexing.dummy_pos method is_ghost = (byte = Lexing.dummy_pos)
method file = byte.Lexing.pos_fname method file = byte.Lexing.pos_fname
@ -99,24 +103,30 @@ let make ~byte ~point_num ~point_bol =
method byte_offset = byte.Lexing.pos_cnum method byte_offset = byte.Lexing.pos_cnum
method to_string ?(offsets=true) mode = method to_string ?(file=true) ?(offsets=true) mode =
let offset = self#offset mode in
let horizontal, value =
if offsets then "character", offset else "column", offset + 1
in sprintf "File \"%s\", line %i, %s %i"
self#file self#line horizontal value
method compact ?(offsets=true) mode =
if self#is_ghost then "ghost" if self#is_ghost then "ghost"
else else
let offset = self#offset mode in let offset = self#offset mode in
sprintf "%s:%i:%i" let horizontal, value =
self#file self#line (if offsets then offset else offset + 1) if offsets then
"character", offset
else "column", offset + 1 in
if file && self#file <> "" then
sprintf "File \"%s\", line %i, %s %i"
self#file self#line horizontal value
else sprintf "Line %i, %s %i"
self#line horizontal value
method anonymous ?(offsets=true) mode = method compact ?(file=true) ?(offsets=true) mode =
if self#is_ghost then "ghost" if self#is_ghost then "ghost"
else sprintf "%i:%i" self#line else
(if offsets then self#offset mode else self#column mode) let horizontal =
if offsets then self#offset mode
else self#column mode in
if file && self#file <> "" then
sprintf "%s:%i:%i" self#file self#line horizontal
else
sprintf "%i:%i" self#line horizontal
end end
let from_byte byte = let from_byte byte =
@ -126,7 +136,9 @@ let from_byte byte =
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1) let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
let min = make ~byte:Lexing.dummy_pos ~point_num:0 ~point_bol:0 let min file =
let pos = make ~byte:Lexing.dummy_pos ~point_num:0 ~point_bol:0
in pos#set_file file
(* Comparisons *) (* Comparisons *)

View File

@ -58,23 +58,36 @@
{li The call [pos#byte_offset] is the offset of the position {li The call [pos#byte_offset] is the offset of the position
[pos] since the begininng of the file, counted in bytes.}} [pos] since the begininng of the file, counted in bytes.}}
*) *)
type t = <
type invalid_pos = [
`Invalid_line
| `Invalid_offset
]
type invalid_line = `Invalid_line
type invalid_offset = `Invalid_offset
type invalid_nl = `Invalid_newline
type t = private <
(* Payload *) (* Payload *)
byte : Lexing.position; byte : Lexing.position;
point_num : int; point_num : int; (* point_num >= point_bol *)
point_bol : int; point_bol : int; (* point_bol >= 0 *)
file : string; file : string; (* May be empty *)
line : int; line : int; (* line > 0 *)
(* Setters *) (* Setters *)
set_file : string -> t; set_file : string -> t;
set_line : int -> t; set_line : int -> (t, invalid_line) Stdlib.result;
set_offset : int -> t; set_offset : int -> (t, invalid_offset) Stdlib.result;
set : file:string -> line:int -> offset:int -> t;
new_line : string -> t; set : ?file:string -> line:int -> offset:int ->
(t, invalid_pos) Stdlib.result;
(* String must be "\n" or "\c\r" *)
new_line : string -> (t, invalid_newline) Stdlib.result
add_nl : t; add_nl : t;
shift_bytes : int -> t; shift_bytes : int -> t;
@ -93,9 +106,10 @@ type t = <
(* Conversions to [string] *) (* Conversions to [string] *)
to_string : ?offsets:bool -> [`Byte | `Point] -> string; to_string :
compact : ?offsets:bool -> [`Byte | `Point] -> string; ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string compact :
?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
> >
(** A shorthand after an [open Pos]. (** A shorthand after an [open Pos].
@ -104,18 +118,22 @@ type pos = t
(** {1 Constructors} *) (** {1 Constructors} *)
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t val make :
val from_byte : Lexing.position -> t byte:Lexing.position -> point_num:int -> point_bol:int ->
(t, invalid_pos) Stdlin.result
val from_byte :
Lexing.position -> (t, invalid_pos) Stdlib.result
(** {1 Special positions} *) (** {1 Special positions} *)
(** The value [ghost] is the same as {! Lexing.dummy_pos}. (** The value [ghost] based on the same as {! Lexing.dummy_pos}.
*) *)
val ghost : t val ghost : t
(** Lexing convention: line [1], offsets to [0] and file to [""]. (** Lexing convention: line [1], offset to [0].
*) *)
val min : t val min : file:string -> t
(** {1 Comparisons} *) (** {1 Comparisons} *)

View File

@ -101,8 +101,8 @@ let make ~(start: Pos.t) ~(stop: Pos.t) =
if start#is_ghost || stop#is_ghost then "ghost" if start#is_ghost || stop#is_ghost then "ghost"
else else
let prefix = if file then start#file ^ ":" else "" let prefix = if file then start#file ^ ":" else ""
and start_str = start#anonymous ~offsets mode and start_str = start#compact ~file:false ~offsets mode
and stop_str = stop#anonymous ~offsets mode in and stop_str = stop#compact ~file:false ~offsets mode in
if start#file = stop#file then if start#file = stop#file then
if start#line = stop#line then if start#line = stop#line then
sprintf "%s%s-%i" prefix start_str sprintf "%s%s-%i" prefix start_str

View File

@ -54,9 +54,9 @@
{li The method [compact] has the same signature as and calling {li The method [compact] has the same signature as and calling
convention as [to_string], except that the resulting string convention as [to_string], except that the resulting string
is more compact.}} is shorter (usually for debugging or tracing).}}
*) *)
type t = < type t = private <
start : Pos.t; start : Pos.t;
stop : Pos.t; stop : Pos.t;
@ -86,9 +86,9 @@ type t = <
*) *)
type region = t type region = t
(** The type ['a reg] enables the concept of something of type ['a] to (** The type ['a reg] enables the concept of some value of type ['a] to
be related to a region in a source file. be related to a region in a source file.
*) *)
type 'a reg = {region: t; value: 'a} type 'a reg = {region: t; value: 'a}
(* {1 Constructors} *) (* {1 Constructors} *)