From f795f1216a67b145e088e6edc6d304817eb3d29c Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Sat, 12 Oct 2019 23:42:26 +0200 Subject: [PATCH] Bug fixing in the lexers and the parser. Started AST pretty-printer. LexToken, AST: Tiny refactoring. Bug: Added the making of the AST node PBytes. Parser: The rule "pattern" was not properly stratified (the constructor "PCons" was always produced, even when no consing was done (now a fall-through to "core_pattern"). Bug: When sharing the lexers between Ligodity and Pascaligo, a regression was introduced with the lexing of symbols. Indeed, symbols specific to Ligodity (like "<>") and Pascaligo (like "=/=") were scanned, but the function "LexToken.mk_sym" for each only accepted their own, yielding to an assertion to be invalidated. Fix: I created an error "sym_err" now to gracefully handle that situation and provide a hint to the programmer (to wit, to check the LIGO syntax in use). WIP: Started to write pretty-printing functions for the nodes of the AST. CLI: The option "--verbose=ast" now calls that function instead of printing the tokens from the AST. When the pretty-printer is finished, the option for printing the tokens will likely be "--verbose=ast-tokens". --- src/passes/1-parser/ligodity/LexToken.mli | 97 ++-- src/passes/1-parser/ligodity/LexToken.mll | 83 ++-- src/passes/1-parser/pascaligo/AST.ml | 4 +- src/passes/1-parser/pascaligo/AST.mli | 4 +- src/passes/1-parser/pascaligo/LexToken.mli | 19 +- src/passes/1-parser/pascaligo/LexToken.mll | 67 +-- src/passes/1-parser/pascaligo/Parser.mly | 9 +- src/passes/1-parser/pascaligo/ParserLog.ml | 514 ++++++++++++++++++++ src/passes/1-parser/pascaligo/ParserLog.mli | 2 + src/passes/1-parser/pascaligo/ParserMain.ml | 3 +- src/passes/1-parser/shared/Lexer.mli | 18 +- src/passes/1-parser/shared/Lexer.mll | 45 +- 12 files changed, 699 insertions(+), 166 deletions(-) diff --git a/src/passes/1-parser/ligodity/LexToken.mli b/src/passes/1-parser/ligodity/LexToken.mli index a30c41714..ea4f0a6ad 100644 --- a/src/passes/1-parser/ligodity/LexToken.mli +++ b/src/passes/1-parser/ligodity/LexToken.mli @@ -31,50 +31,50 @@ type lexeme = string type t = (* Symbols *) - ARROW of Region.t (* "->" *) -| CONS of Region.t (* "::" *) -| CAT of Region.t (* "^" *) - (*| APPEND (* "@" *)*) + ARROW of Region.t (* "->" *) +| CONS of Region.t (* "::" *) +| CAT of Region.t (* "^" *) +(*| APPEND (* "@" *)*) (* Arithmetics *) | MINUS of Region.t (* "-" *) -| PLUS of Region.t (* "+" *) +| PLUS of Region.t (* "+" *) | SLASH of Region.t (* "/" *) -| TIMES of Region.t (* "*" *) +| TIMES of Region.t (* "*" *) (* Compounds *) -| LPAR of Region.t (* "(" *) -| RPAR of Region.t (* ")" *) -| LBRACKET of Region.t (* "[" *) -| RBRACKET of Region.t (* "]" *) -| LBRACE of Region.t (* "{" *) -| RBRACE of Region.t (* "}" *) +| LPAR of Region.t (* "(" *) +| RPAR of Region.t (* ")" *) +| LBRACKET of Region.t (* "[" *) +| RBRACKET of Region.t (* "]" *) +| LBRACE of Region.t (* "{" *) +| RBRACE of Region.t (* "}" *) (* Separators *) -| COMMA of Region.t (* "," *) -| SEMI of Region.t (* ";" *) -| VBAR of Region.t (* "|" *) -| COLON of Region.t (* ":" *) -| DOT of Region.t (* "." *) +| COMMA of Region.t (* "," *) +| SEMI of Region.t (* ";" *) +| VBAR of Region.t (* "|" *) +| COLON of Region.t (* ":" *) +| DOT of Region.t (* "." *) (* Wildcard *) -| WILD of Region.t (* "_" *) +| WILD of Region.t (* "_" *) (* Comparisons *) | EQ of Region.t (* "=" *) -| NE of Region.t (* "<>" *) -| LT of Region.t (* "<" *) -| GT of Region.t (* ">" *) +| NE of Region.t (* "<>" *) +| LT of Region.t (* "<" *) +| GT of Region.t (* ">" *) | LE of Region.t (* "=<" *) -| GE of Region.t (* ">=" *) +| GE of Region.t (* ">=" *) -| BOOL_OR of Region.t (* "||" *) -| BOOL_AND of Region.t(* "&&" *) +| BOOL_OR of Region.t (* "||" *) +| BOOL_AND of Region.t (* "&&" *) (* Identifiers, labels, numbers and strings *) @@ -90,24 +90,24 @@ type t = (*| And*) | Begin of Region.t -| Else of Region.t -| End of Region.t +| Else of Region.t +| End of Region.t | False of Region.t -| Fun of Region.t -| If of Region.t -| In of Region.t -| Let of Region.t +| Fun of Region.t +| If of Region.t +| In of Region.t +| Let of Region.t | Match of Region.t -| Mod of Region.t -| Not of Region.t -| Of of Region.t -| Or of Region.t -| Then of Region.t -| True of Region.t -| Type of Region.t -| With of Region.t +| Mod of Region.t +| Not of Region.t +| Of of Region.t +| Or of Region.t +| Then of Region.t +| True of Region.t +| Type of Region.t +| With of Region.t - (* Liquidity specific *) + (* Liquidity-specific *) | LetEntry of Region.t | MatchNat of Region.t @@ -137,23 +137,20 @@ val to_region : token -> Region.t (* Injections *) -type int_err = - Non_canonical_zero - +type int_err = Non_canonical_zero type ident_err = Reserved_name +type nat_err = Invalid_natural + | Non_canonical_zero_nat +type sym_err = Invalid_symbol -type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat - -val mk_string : lexeme -> Region.t -> token -val mk_bytes : lexeme -> Region.t -> token val mk_int : lexeme -> Region.t -> (token, int_err) result -val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result +val mk_nat : lexeme -> Region.t -> (token, nat_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result +val mk_sym : lexeme -> Region.t -> (token, sym_err) result +val mk_string : lexeme -> Region.t -> token +val mk_bytes : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token -val mk_sym : lexeme -> Region.t -> token val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/ligodity/LexToken.mll b/src/passes/1-parser/ligodity/LexToken.mll index 74c32cd1a..dd70fd58c 100644 --- a/src/passes/1-parser/ligodity/LexToken.mll +++ b/src/passes/1-parser/ligodity/LexToken.mll @@ -200,8 +200,8 @@ let to_lexeme = function | BOOL_AND _ -> "&&" | Ident id -> id.Region.value | Constr id -> id.Region.value - | Int i - | Nat i + | Int i + | Nat i | Mtz i -> fst i.Region.value | Str s -> s.Region.value | Bytes b -> fst b.Region.value @@ -264,7 +264,7 @@ let keywords = [ let reserved = let open SSet in - empty + empty |> add "and" |> add "as" |> add "asr" @@ -284,7 +284,7 @@ let reserved = |> add "lazy" |> add "lor" |> add "lsl" - |> add "lsr" + |> add "lsr" |> add "lxor" |> add "method" |> add "module" @@ -306,7 +306,7 @@ let reserved = let constructors = [ (fun reg -> False reg); - (fun reg -> True reg); + (fun reg -> True reg); ] let add map (key, value) = SMap.add key value map @@ -379,15 +379,14 @@ let mk_int lexeme region = then Error Non_canonical_zero else Ok (Int Region.{region; value = lexeme, z}) -type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat - +type nat_err = + Invalid_natural +| Non_canonical_zero_nat let mk_nat lexeme region = - match (String.index_opt lexeme 'p') with + match (String.index_opt lexeme 'p') with | None -> Error Invalid_natural - | Some _ -> ( + | Some _ -> ( let z = Str.(global_replace (regexp "_") "" lexeme) |> Str.(global_replace (regexp "p") "") |> @@ -408,35 +407,41 @@ let mk_mtz lexeme region = let eof region = EOF region +type sym_err = Invalid_symbol + let mk_sym lexeme region = match lexeme with - "->" -> ARROW region - | "::" -> CONS region - | "^" -> CAT region - | "-" -> MINUS region - | "+" -> PLUS region - | "/" -> SLASH region - | "*" -> TIMES region - | "[" -> LBRACKET region - | "]" -> RBRACKET region - | "{" -> LBRACE region - | "}" -> RBRACE region - | "," -> COMMA region - | ";" -> SEMI region - | "|" -> VBAR region - | ":" -> COLON region - | "." -> DOT region - | "_" -> WILD region - | "=" -> EQ region - | "<>" -> NE region - | "<" -> LT region - | ">" -> GT region - | "=<" -> LE region - | ">=" -> GE region - | "||" -> BOOL_OR region - | "&&" -> BOOL_AND region - | "(" -> LPAR region - | ")" -> RPAR region + (* Lexemes in common with all concrete syntaxes *) + ";" -> Ok (SEMI region) + | "," -> Ok (COMMA region) + | "(" -> Ok (LPAR region) + | ")" -> Ok (RPAR region) + | "[" -> Ok (LBRACKET region) + | "]" -> Ok (RBRACKET region) + | "{" -> Ok (LBRACE region) + | "}" -> Ok (RBRACE region) + | "=" -> Ok (EQUAL region) + | ":" -> Ok (COLON region) + | "|" -> Ok (VBAR region) + | "->" -> Ok (ARROW region) + | "." -> Ok (DOT region) + | "_" -> Ok (WILD region) + | "^" -> Ok (CAT region) + | "+" -> Ok (PLUS region) + | "-" -> Ok (MINUS region) + | "*" -> Ok (TIMES region) + | "/" -> Ok (SLASH region) + | "<" -> Ok (LT region) + | "<=" -> Ok (LEQ region) + | ">" -> Ok (GT region) + | ">=" -> Ok (GEQ region) + + + | "<>" -> Ok (NE region) + | "::" -> Ok (CONS region) + | "||" -> Ok (BOOL_OR region) + | "&&" -> Ok (BOOL_AND region) + | a -> failwith ("Not understood token: " ^ a) (* Identifiers *) @@ -533,4 +538,4 @@ let is_sym = function let is_eof = function EOF _ -> true | _ -> false (* END TRAILER *) -} \ No newline at end of file +} diff --git a/src/passes/1-parser/pascaligo/AST.ml b/src/passes/1-parser/pascaligo/AST.ml index 44c6c0734..cf0cb2014 100644 --- a/src/passes/1-parser/pascaligo/AST.ml +++ b/src/passes/1-parser/pascaligo/AST.ml @@ -188,7 +188,7 @@ and type_decl = { and type_expr = TProd of cartesian | TSum of (variant reg, vbar) nsepseq reg -| TRecord of record_type +| TRecord of field_decl reg injection reg | TApp of (type_name * type_tuple) reg | TFun of (type_expr * arrow * type_expr) reg | TPar of type_expr par reg @@ -201,8 +201,6 @@ and variant = { args : (kwd_of * cartesian) option } -and record_type = field_decl reg injection reg - and field_decl = { field_name : field_name; colon : colon; diff --git a/src/passes/1-parser/pascaligo/AST.mli b/src/passes/1-parser/pascaligo/AST.mli index 4984830e0..ee4d1982c 100644 --- a/src/passes/1-parser/pascaligo/AST.mli +++ b/src/passes/1-parser/pascaligo/AST.mli @@ -172,7 +172,7 @@ and type_decl = { and type_expr = TProd of cartesian | TSum of (variant reg, vbar) nsepseq reg -| TRecord of record_type +| TRecord of field_decl reg injection reg | TApp of (type_name * type_tuple) reg | TFun of (type_expr * arrow * type_expr) reg | TPar of type_expr par reg @@ -185,8 +185,6 @@ and variant = { args : (kwd_of * cartesian) option } -and record_type = field_decl reg injection reg - and field_decl = { field_name : field_name; colon : colon; diff --git a/src/passes/1-parser/pascaligo/LexToken.mli b/src/passes/1-parser/pascaligo/LexToken.mli index 49998a2e1..07138aa3f 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mli +++ b/src/passes/1-parser/pascaligo/LexToken.mli @@ -137,23 +137,20 @@ val to_region : token -> Region.t (* Injections *) -type int_err = - Non_canonical_zero - +type int_err = Non_canonical_zero type ident_err = Reserved_name +type nat_err = Invalid_natural + | Non_canonical_zero_nat +type sym_err = Invalid_symbol -type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat - -val mk_string : lexeme -> Region.t -> token -val mk_bytes : lexeme -> Region.t -> token val mk_int : lexeme -> Region.t -> (token, int_err) result -val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result +val mk_nat : lexeme -> Region.t -> (token, nat_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result +val mk_sym : lexeme -> Region.t -> (token, sym_err) result +val mk_string : lexeme -> Region.t -> token +val mk_bytes : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token -val mk_sym : lexeme -> Region.t -> token val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index b92ae7edd..f0bd96bc8 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -480,9 +480,9 @@ let mk_int lexeme region = then Error Non_canonical_zero else Ok (Int Region.{region; value = lexeme, z}) -type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat +type nat_err = + Invalid_natural +| Non_canonical_zero_nat let mk_nat lexeme region = match (String.index_opt lexeme 'n') with @@ -508,35 +508,42 @@ let mk_mtz lexeme region = let eof region = EOF region +type sym_err = Invalid_symbol + let mk_sym lexeme region = match lexeme with - ";" -> SEMI region - | "," -> COMMA region - | "(" -> LPAR region - | ")" -> RPAR region - | "{" -> LBRACE region - | "}" -> RBRACE region - | "[" -> LBRACKET region - | "]" -> RBRACKET region - | "#" -> CONS region - | "|" -> VBAR region - | "->" -> ARROW region - | ":=" -> ASS region - | "=" -> EQUAL region - | ":" -> COLON region - | "<" -> LT region - | "<=" -> LEQ region - | ">" -> GT region - | ">=" -> GEQ region - | "=/=" -> NEQ region - | "+" -> PLUS region - | "-" -> MINUS region - | "/" -> SLASH region - | "*" -> TIMES region - | "." -> DOT region - | "_" -> WILD region - | "^" -> CAT region - | _ -> assert false + (* Lexemes in common with all concrete syntaxes *) + ";" -> Ok (SEMI region) + | "," -> Ok (COMMA region) + | "(" -> Ok (LPAR region) + | ")" -> Ok (RPAR region) + | "[" -> Ok (LBRACKET region) + | "]" -> Ok (RBRACKET region) + | "{" -> Ok (LBRACE region) + | "}" -> Ok (RBRACE region) + | "=" -> Ok (EQUAL region) + | ":" -> Ok (COLON region) + | "|" -> Ok (VBAR region) + | "->" -> Ok (ARROW region) + | "." -> Ok (DOT region) + | "_" -> Ok (WILD region) + | "^" -> Ok (CAT region) + | "+" -> Ok (PLUS region) + | "-" -> Ok (MINUS region) + | "*" -> Ok (TIMES region) + | "/" -> Ok (SLASH region) + | "<" -> Ok (LT region) + | "<=" -> Ok (LEQ region) + | ">" -> Ok (GT region) + | ">=" -> Ok (GEQ region) + + (* Lexemes specific to PascaLIGO *) + | "=/=" -> Ok (NEQ region) + | "#" -> Ok (CONS region) + | ":=" -> Ok (ASS region) + + (* Invalid lexemes *) + | _ -> Error Invalid_symbol (* Identifiers *) diff --git a/src/passes/1-parser/pascaligo/Parser.mly b/src/passes/1-parser/pascaligo/Parser.mly index 55729ed77..f69822446 100644 --- a/src/passes/1-parser/pascaligo/Parser.mly +++ b/src/passes/1-parser/pascaligo/Parser.mly @@ -935,14 +935,17 @@ list_expr: (* Patterns *) pattern: - nsepseq(core_pattern,CONS) { - let region = nsepseq_to_region pattern_to_region $1 - in PCons {region; value=$1}} + core_pattern CONS nsepseq(core_pattern,CONS) { + let value = Utils.nsepseq_cons $1 $2 $3 in + let region = nsepseq_to_region pattern_to_region value + in PCons {region; value}} +| core_pattern { $1 } core_pattern: var { PVar $1 } | WILD { PWild $1 } | Int { PInt $1 } +| Bytes { PBytes $1 } | String { PString $1 } | C_Unit { PUnit $1 } | C_False { PFalse $1 } diff --git a/src/passes/1-parser/pascaligo/ParserLog.ml b/src/passes/1-parser/pascaligo/ParserLog.ml index 6cf9ccc3e..32aa4fcff 100644 --- a/src/passes/1-parser/pascaligo/ParserLog.ml +++ b/src/passes/1-parser/pascaligo/ParserLog.ml @@ -740,3 +740,517 @@ let tokens_to_string = to_string print_tokens let path_to_string = to_string print_path let pattern_to_string = to_string print_pattern let instruction_to_string = to_string print_instruction + +(* Pretty-printing the AST *) + +let mk_pad len rank pc = + pc ^ (if rank = len-1 then "`-- " else "|-- "), + pc ^ (if rank = len-1 then " " else "| ") + +let rec pp_ast buffer ~pad:(pd,pc) {decl; _} = + let node = sprintf "%s\n" pd in + let () = Buffer.add_string buffer node in + let apply len rank = + let pad = mk_pad len rank pc in + pp_declaration buffer ~pad in + let decls = Utils.nseq_to_list decl + in List.iteri (List.length decls |> apply) decls + +and pp_ident buffer ~pad:(pd,_) name = + let node = sprintf "%s%s\n" pd name + in Buffer.add_string buffer node + +and pp_string buffer = pp_ident buffer + +and pp_declaration buffer ~pad:(pd,pc) = function + TypeDecl {value; _} -> + let node = sprintf "%sTypeDecl\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 2 0 pc) value.name.value; + pp_type_expr buffer ~pad:(mk_pad 2 1 pc) value.type_expr +| ConstDecl {value; _} -> + let node = sprintf "%sConstDecl\n" pd in + Buffer.add_string buffer node; + pp_const_decl buffer ~pad:(mk_pad 1 0 pc) value +| LambdaDecl lamb -> + let node = sprintf "%sLambdaDecl\n" pd in + Buffer.add_string buffer node; + pp_lambda_decl buffer ~pad:(mk_pad 1 0 pc) lamb + +and pp_const_decl buffer ~pad:(_,pc) decl = + pp_ident buffer ~pad:(mk_pad 3 0 pc) decl.name.value; + pp_type_expr buffer ~pad:(mk_pad 3 1 pc) decl.const_type; + pp_expr buffer ~pad:(mk_pad 3 2 pc) decl.init + +and pp_type_expr buffer ~pad:(pd,pc as pad) = function + TProd cartesian -> + let node = sprintf "%sTProd\n" pd in + Buffer.add_string buffer node; + pp_cartesian buffer ~pad cartesian +| TAlias {value; _} -> + let node = sprintf "%sTAlias\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) value +| TPar {value; _} -> + let node = sprintf "%sTPar\n" pd in + Buffer.add_string buffer node; + pp_type_expr buffer ~pad:(mk_pad 1 0 pc) value.inside +| TApp {value=name,tuple; _} -> + let node = sprintf "%sTApp\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) name.value; + pp_type_tuple buffer ~pad:(mk_pad 2 1 pc) tuple +| TFun {value; _} -> + let node = sprintf "%sTFun\n" pd in + let () = Buffer.add_string buffer node in + let apply len rank = + let pad = mk_pad len rank pc in + pp_type_expr buffer ~pad in + let domain, _, range = value in + List.iteri (apply 2) [domain; range] +| TSum {value; _} -> + let node = sprintf "%sTSum\n" pd in + let () = Buffer.add_string buffer node in + let apply len rank variant = + let pad = mk_pad len rank pc in + pp_variant buffer ~pad variant.value in + let variants = Utils.nsepseq_to_list value in + List.iteri (List.length variants |> apply) variants +| TRecord {value; _} -> + let node = sprintf "%sTRecord\n" pd in + let () = Buffer.add_string buffer node in + let apply len rank field_decl = + pp_field_decl buffer ~pad:(mk_pad len rank pc) + field_decl.value in + let fields = Utils.sepseq_to_list value.elements in + List.iteri (List.length fields |> apply) fields + +and pp_cartesian buffer ~pad:(_,pc) {value; _} = + let apply len rank = + pp_type_expr buffer ~pad:(mk_pad len rank pc) in + let components = Utils.nsepseq_to_list value + in List.iteri (List.length components |> apply) components + +and pp_variant buffer ~pad:(pd,_ as pad) {constr; args} = + let node = sprintf "%s%s\n" pd constr.value in + Buffer.add_string buffer node; + match args with + None -> () + | Some (_,c) -> pp_cartesian buffer ~pad c + +and pp_field_decl buffer ~pad:(pd,pc) decl = + let node = sprintf "%s%s\n" pd decl.field_name.value in + Buffer.add_string buffer node; + pp_type_expr buffer ~pad:(mk_pad 1 0 pc) decl.field_type + +and pp_type_tuple buffer ~pad:(_,pc) {value; _} = + let components = Utils.nsepseq_to_list value.inside in + let apply len rank = + pp_type_expr buffer ~pad:(mk_pad len rank pc) + in List.iteri (List.length components |> apply) components + +and pp_lambda_decl buffer ~pad = function + FunDecl {value; _} -> + let node = sprintf "%sFunDecl\n" (fst pad) in + Buffer.add_string buffer node; + pp_fun_decl buffer ~pad value +| ProcDecl {value; _} -> + let node = sprintf "%sProcDecl\n" (fst pad) in + Buffer.add_string buffer node; + pp_proc_decl buffer ~pad value + +and pp_fun_decl buffer ~pad:(_,pc) decl = + let () = + let pad = mk_pad 6 0 pc in + pp_ident buffer ~pad decl.name.value in + let () = + let pd, _ as pad = mk_pad 6 1 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_parameters buffer ~pad decl.param in + let () = + let pd, pc = mk_pad 6 2 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_type_expr buffer ~pad:(mk_pad 1 0 pc) decl.ret_type in + let () = + let pd, _ as pad = mk_pad 6 3 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_local_decls buffer ~pad decl.local_decls in + let () = + let pd, _ as pad = mk_pad 6 4 pc in + let node = sprintf "%s\n" pd in + let statements = decl.block.value.statements in + Buffer.add_string buffer node; + pp_statements buffer ~pad statements in + let () = + let pd, pc = mk_pad 6 5 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_expr buffer ~pad:(mk_pad 1 0 pc) decl.return + in () + +and pp_parameters buffer ~pad:(_,pc) {value; _} = + let params = Utils.nsepseq_to_list value.inside in + let arity = List.length params in + let apply len rank = + pp_param_decl buffer ~pad:(mk_pad len rank pc) + in List.iteri (apply arity) params + +and pp_param_decl buffer ~pad:(pd,pc) = function + ParamConst {value; _} -> + let node = sprintf "%sParamConst\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 2 0 pc) value.var.value; + pp_type_expr buffer ~pad:(mk_pad 2 1 pc) value.param_type +| ParamVar {value; _} -> + let node = sprintf "%sParamVar\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 2 0 pc) value.var.value; + pp_type_expr buffer ~pad:(mk_pad 2 1 pc) value.param_type + +and pp_statements buffer ~pad:(_,pc) statements = + let statements = Utils.nsepseq_to_list statements in + let length = List.length statements in + let apply len rank = + pp_statement buffer ~pad:(mk_pad len rank pc) + in List.iteri (apply length) statements + +and pp_statement buffer ~pad:(pd,pc as pad) = function + Instr instr -> + let node = sprintf "%sInstr\n" pd in + Buffer.add_string buffer node; + pp_instruction buffer ~pad:(mk_pad 1 0 pc) instr +| Data data_decl -> + let node = sprintf "%sData\n" pd in + Buffer.add_string buffer node; + pp_data_decl buffer ~pad data_decl + +and pp_instruction buffer ~pad:(pd,pc as pad) = function + Single single_instr -> + let node = sprintf "%sSingle\n" pd in + Buffer.add_string buffer node; + pp_single_instr buffer ~pad:(mk_pad 1 0 pc) single_instr +| Block {value; _} -> + let node = sprintf "%sBlock\n" pd in + Buffer.add_string buffer node; + pp_statements buffer ~pad value.statements + +and pp_single_instr buffer ~pad:(pd,pc as pad) = function + Cond {value; _} -> + let node = sprintf "%sCond\n" pd in + Buffer.add_string buffer node; + pp_conditional buffer ~pad value +| CaseInstr {value; _} -> + let node = sprintf "%sCaseInstr\n" pd in + Buffer.add_string buffer node; + pp_case pp_instruction buffer ~pad value +| Assign {value; _} -> + let node = sprintf "%sAssign\n" pd in + Buffer.add_string buffer node; + pp_assignment buffer ~pad value +| Loop loop -> + let node = sprintf "%sLoop\n" pd in + Buffer.add_string buffer node; + pp_loop buffer ~pad:(mk_pad 1 0 pc) loop +| ProcCall call -> + let node = sprintf "%sProcCall\n" pd in + Buffer.add_string buffer node; + pp_fun_call buffer ~pad:(mk_pad 1 0 pc) call +| Skip _ -> + let node = sprintf "%sSkip\n" pd in + Buffer.add_string buffer node +| RecordPatch {value; _} -> + let node = sprintf "%sRecordPatch\n" pd in + Buffer.add_string buffer node; + pp_record_patch buffer ~pad:(mk_pad 1 0 pc) value +| MapPatch {value; _} -> + let node = sprintf "%sMapPatch\n" pd in + Buffer.add_string buffer node; + pp_map_patch buffer ~pad:(mk_pad 1 0 pc) value +| SetPatch {value; _} -> + let node = sprintf "%SetPatch\n" pd in + Buffer.add_string buffer node; + pp_set_patch buffer ~pad:(mk_pad 1 0 pc) value +| MapRemove {value; _} -> + let node = sprintf "%sMapRemove\n" pd in + Buffer.add_string buffer node; + pp_map_remove buffer ~pad:(mk_pad 1 0 pc) value +| SetRemove {value; _} -> + let node = sprintf "%sSetRemove\n" pd in + Buffer.add_string buffer node; + pp_set_remove buffer ~pad:(mk_pad 1 0 pc) value + +and pp_conditional buffer ~pad:(_,pc) cond = + let () = + let pd, pc = mk_pad 3 0 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_expr buffer ~pad:(mk_pad 1 0 pc) cond.test in + let () = + let pd, pc = mk_pad 3 1 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_if_clause buffer ~pad:(mk_pad 1 0 pc) cond.ifso in + let () = + let pd, pc = mk_pad 3 2 pc in + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_if_clause buffer ~pad:(mk_pad 2 1 pc) cond.ifnot + in () + +and pp_if_clause buffer ~pad:(pd,pc) = function + ClauseInstr instr -> + let node = sprintf "%sClauseInstr\n" pd in + Buffer.add_string buffer node; + pp_instruction buffer ~pad:(mk_pad 1 0 pc) instr +| ClauseBlock {value; _} -> + let node = sprintf "%sClauseBlock\n" pd in + let statements, _ = value.inside in + Buffer.add_string buffer node; + pp_statements buffer ~pad:(mk_pad 1 0 pc) statements + +and pp_case printer buffer ~pad:(_,pc) case = + let clauses = Utils.nsepseq_to_list case.cases.value in + let length = List.length clauses in + let apply len rank = + pp_case_clause printer buffer ~pad:(mk_pad len rank pc) + in pp_expr buffer ~pad:(mk_pad length 0 pc) case.expr; + List.iteri (apply length) clauses + +and pp_case_clause printer buffer ~pad:(pd,pc) {value; _} = + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_pattern buffer ~pad:(mk_pad 2 0 pc) value.pattern; + printer buffer ~pad:(mk_pad 2 1 pc) value.rhs + +and pp_pattern buffer ~pad:(pd,pc as pad) = function + PNone _ -> + let node = sprintf "%sPNone\n" pd in + Buffer.add_string buffer node +| PSome {value=_,{value=par; _}; _} -> + let node = sprintf "%sPSome\n" pd in + Buffer.add_string buffer node; + pp_pattern buffer ~pad:(mk_pad 1 0 pc) par.inside +| PWild _ -> + let node = sprintf "%sPWild\n" pd + in Buffer.add_string buffer node +| PConstr {value; _} -> + let node = sprintf "%sPConstr\n" pd in + Buffer.add_string buffer node; + pp_constr buffer ~pad:(mk_pad 1 0 pc) value +| PCons {value; _} -> + let node = sprintf "%sPCons\n" pd in + let patterns = Utils.nsepseq_to_list value in + let length = List.length patterns in + let apply len rank = + pp_pattern buffer ~pad:(mk_pad len rank pc) in + Buffer.add_string buffer node; + List.iteri (apply length) patterns +| PVar {value; _} -> + let node = sprintf "%sPVar\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) value +| PInt {value; _} -> + let node = sprintf "%sPInt\n" pd in + Buffer.add_string buffer node; + pp_int buffer ~pad value +| PBytes {value; _} -> + let node = sprintf "%sPBytes\n" pd in + Buffer.add_string buffer node; + pp_bytes buffer ~pad value +| PString {value; _} -> + let node = sprintf "%sPString\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) value +| PUnit _ -> + let node = sprintf "%sPUnit\n" pd in + Buffer.add_string buffer node +| PFalse _ -> + let node = sprintf "%sPFalse\n" pd in + Buffer.add_string buffer node +| PTrue _ -> + let node = sprintf "%sPTrue\n" pd in + Buffer.add_string buffer node +| PList plist -> + let node = sprintf "%sPList\n" pd in + Buffer.add_string buffer node; + pp_plist buffer ~pad:(mk_pad 1 0 pc) plist +| PTuple {value; _} -> + let node = sprintf "%sPTuple\n" pd in + Buffer.add_string buffer node; + pp_tuple_pattern buffer ~pad:(mk_pad 1 0 pc) value + +and pp_bytes buffer ~pad:(_,pc) (lexeme, hex) = + pp_string buffer ~pad:(mk_pad 2 0 pc) lexeme; + pp_string buffer ~pad:(mk_pad 2 1 pc) (Hex.to_string hex) + +and pp_int buffer ~pad:(_,pc) (lexeme, z) = + pp_string buffer ~pad:(mk_pad 2 0 pc) lexeme; + pp_string buffer ~pad:(mk_pad 2 1 pc) (Z.to_string z) + +and pp_constr buffer ~pad = function + {value; _}, None -> + pp_ident buffer ~pad value +| {value=id; _}, Some {value=ptuple; _} -> + pp_ident buffer ~pad id; + pp_tuple_pattern buffer ~pad ptuple + +and pp_plist buffer ~pad:(pd,pc) = function + Sugar {value; _} -> + let node = sprintf "%sSugar\n" pd in + Buffer.add_string buffer node; + pp_injection pp_pattern buffer ~pad:(mk_pad 1 0 pc) value +| PNil _ -> + let node = sprintf "%sPNil\n" pd in + Buffer.add_string buffer node +| Raw {value; _} -> + let node = sprintf "%sRaw\n" pd in + Buffer.add_string buffer node; + pp_raw buffer ~pad:(mk_pad 1 0 pc) value.inside + +and pp_raw buffer ~pad:(_,pc) (head, _, tail) = + pp_pattern buffer ~pad:(mk_pad 2 0 pc) head; + pp_pattern buffer ~pad:(mk_pad 2 1 pc) tail + +and pp_injection printer buffer ~pad:(_,pc) inj = + let elements = Utils.sepseq_to_list inj.elements in + let length = List.length elements in + let apply len rank = + printer buffer ~pad:(mk_pad len rank pc) + in List.iteri (apply length) elements + +and pp_tuple_pattern buffer ~pad:(_,pc) tuple = + let patterns = Utils.nsepseq_to_list tuple.inside in + let length = List.length patterns in + let apply len rank = + pp_pattern buffer ~pad:(mk_pad len rank pc) + in List.iteri (apply length) patterns + +and pp_assignment buffer ~pad:(_,pc) asgn = + pp_lhs buffer ~pad:(mk_pad 2 0 pc) asgn.lhs; + pp_rhs buffer ~pad:(mk_pad 2 1 pc) asgn.rhs + +and pp_rhs buffer ~pad:(pd,pc) rhs = + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + pp_expr buffer ~pad:(mk_pad 1 0 pc) rhs + +and pp_lhs buffer ~pad:(pd,pc) lhs = + let node = sprintf "%s\n" pd in + Buffer.add_string buffer node; + let pd, pc as pad = mk_pad 1 0 pc in + match lhs with + Path path -> + let node = sprintf "%sPath\n" pd in + Buffer.add_string buffer node; + pp_path buffer ~pad:(mk_pad 1 0 pc) path + | MapPath {value; _} -> + let node = sprintf "%sMapPath\n" pd in + Buffer.add_string buffer node; + pp_map_lookup buffer ~pad value + +and pp_path buffer ~pad:(pd,pc as pad) = function + Name {value; _} -> + let node = sprintf "%sName\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) value +| Path {value; _} -> + let node = sprintf "%sPath\n" pd in + Buffer.add_string buffer node; + pp_projection buffer ~pad value + +and pp_projection buffer ~pad:(_,pc) proj = + let selections = Utils.nsepseq_to_list proj.field_path in + let len = List.length selections in + let apply len rank = + pp_selection buffer ~pad:(mk_pad len rank pc) in + pp_ident buffer ~pad:(mk_pad (1+len) 0 pc) proj.struct_name.value; + List.iteri (apply len) selections + +and pp_selection buffer ~pad:(pd,pc as pad) = function + FieldName {value; _} -> + let node = sprintf "%sFieldName\n" pd in + Buffer.add_string buffer node; + pp_ident buffer ~pad:(mk_pad 1 0 pc) value +| Component {value; _} -> + let node = sprintf "%sComponent\n" pd in + Buffer.add_string buffer node; + pp_int buffer ~pad value + +and pp_map_lookup buffer ~pad:(_,pc) lookup = + pp_path buffer ~pad:(mk_pad 2 0 pc) lookup.path; + pp_expr buffer ~pad:(mk_pad 2 1 pc) lookup.index.value.inside + +and pp_loop buffer ~pad:(pd,pc) loop = + let node = sprintf "%sPP_LOOP\n" pd in + Buffer.add_string buffer node + +and pp_fun_call buffer ~pad:(pd,pc) call = + let node = sprintf "%sPP_FUN_CALL\n" pd in + Buffer.add_string buffer node + +and pp_record_patch buffer ~pad:(pd,pc) patch = + let node = sprintf "%sPP_RECORD_PATCH\n" pd in + Buffer.add_string buffer node + +and pp_map_patch buffer ~pad:(pd,pc) patch = + let node = sprintf "%sPP_MAP_PATCH\n" pd in + Buffer.add_string buffer node + +and pp_set_patch buffer ~pad:(pd,pc) patch = + let node = sprintf "%sPP_SET_PATCH\n" pd in + Buffer.add_string buffer node + +and pp_map_remove buffer ~pad:(pd,pc) rem = + let node = sprintf "%sPP_MAP_REMOVE\n" pd in + Buffer.add_string buffer node + +and pp_set_remove buffer ~pad:(pd,pc) rem = + let node = sprintf "%sPP_SET_REMOVE\n" pd in + Buffer.add_string buffer node + +and pp_local_decls buffer ~pad:(_,pc) decls = + let apply len rank = + pp_local_decl buffer ~pad:(mk_pad len rank pc) + in List.iteri (List.length decls |> apply) decls + +and pp_local_decl buffer ~pad:(pd,pc) = function + LocalFun {value; _} -> + let node = sprintf "%sLocalFun\n" pd in + Buffer.add_string buffer node; + pp_fun_decl buffer ~pad:(mk_pad 1 0 pc) value +| LocalProc {value; _} -> + let node = sprintf "%sLocalProc\n" pd in + Buffer.add_string buffer node; + pp_proc_decl buffer ~pad:(mk_pad 1 0 pc) value +| LocalData data -> + let node = sprintf "%sLocalData\n" pd in + Buffer.add_string buffer node; + pp_data_decl buffer ~pad:(mk_pad 1 0 pc) data + +and pp_data_decl buffer ~pad = function + LocalConst {value; _} -> + let node = sprintf "%sLocalConst\n" (fst pad) in + Buffer.add_string buffer node; + pp_const_decl buffer ~pad value +| LocalVar {value; _} -> + let node = sprintf "%sLocalVar\n" (fst pad) in + Buffer.add_string buffer node; + pp_var_decl buffer ~pad value + +and pp_var_decl buffer ~pad:(_,pc) decl = + pp_ident buffer ~pad:(mk_pad 3 0 pc) decl.name.value; + pp_type_expr buffer ~pad:(mk_pad 3 1 pc) decl.var_type; + pp_expr buffer ~pad:(mk_pad 3 2 pc) decl.init + +and pp_proc_decl buffer ~pad:(pd,pc) decl = + let node = sprintf "%sPP_PROC_DECL\n" pd in + Buffer.add_string buffer node + +and pp_expr buffer ~pad:(pd,pc) decl = + let node = sprintf "%sPP_EXPR\n" pd in + Buffer.add_string buffer node + +let pp_ast buffer = pp_ast buffer ~pad:("","") diff --git a/src/passes/1-parser/pascaligo/ParserLog.mli b/src/passes/1-parser/pascaligo/ParserLog.mli index ad0c3f4f3..bf53dc3e2 100644 --- a/src/passes/1-parser/pascaligo/ParserLog.mli +++ b/src/passes/1-parser/pascaligo/ParserLog.mli @@ -12,3 +12,5 @@ val tokens_to_string : AST.t -> string val path_to_string : AST.path -> string val pattern_to_string : AST.pattern -> string val instruction_to_string : AST.instruction -> string + +val pp_ast : Buffer.t -> AST.t -> unit diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index 70d8a8542..b1f43c0ac 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -107,7 +107,8 @@ let () = begin ParserLog.offsets := options.offsets; ParserLog.mode := options.mode; - ParserLog.print_tokens buffer ast; + (* ParserLog.print_tokens buffer ast;*) + ParserLog.pp_ast buffer ast; Buffer.output_buffer stdout buffer end with diff --git a/src/passes/1-parser/shared/Lexer.mli b/src/passes/1-parser/shared/Lexer.mli index 7d4cbb810..8f56ac87e 100644 --- a/src/passes/1-parser/shared/Lexer.mli +++ b/src/passes/1-parser/shared/Lexer.mli @@ -60,22 +60,22 @@ module type TOKEN = (* Errors *) - type int_err = Non_canonical_zero - type ident_err = Reserved_name - type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat + type int_err = Non_canonical_zero + type ident_err = Reserved_name + type nat_err = Invalid_natural + | Non_canonical_zero_nat + type sym_err = Invalid_symbol (* Injections *) - val mk_string : lexeme -> Region.t -> token - val mk_bytes : lexeme -> Region.t -> token val mk_int : lexeme -> Region.t -> (token, int_err) result - val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result + val mk_nat : lexeme -> Region.t -> (token, nat_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result + val mk_sym : lexeme -> Region.t -> (token, sym_err) result + val mk_string : lexeme -> Region.t -> token + val mk_bytes : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token - val mk_sym : lexeme -> Region.t -> token val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index f2172595f..012d8b6b6 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -101,22 +101,22 @@ module type TOKEN = (* Errors *) - type int_err = Non_canonical_zero - type ident_err = Reserved_name - type invalid_natural = - | Invalid_natural - | Non_canonical_zero_nat + type int_err = Non_canonical_zero + type ident_err = Reserved_name + type nat_err = Invalid_natural + | Non_canonical_zero_nat + type sym_err = Invalid_symbol (* Injections *) - val mk_string : lexeme -> Region.t -> token - val mk_bytes : lexeme -> Region.t -> token val mk_int : lexeme -> Region.t -> (token, int_err) result - val mk_nat : lexeme -> Region.t -> (token, invalid_natural) result + val mk_nat : lexeme -> Region.t -> (token, nat_err) result val mk_mtz : lexeme -> Region.t -> (token, int_err) result val mk_ident : lexeme -> Region.t -> (token, ident_err) result + val mk_sym : lexeme -> Region.t -> (token, sym_err) result + val mk_string : lexeme -> Region.t -> token + val mk_bytes : lexeme -> Region.t -> token val mk_constr : lexeme -> Region.t -> token - val mk_sym : lexeme -> Region.t -> token val eof : Region.t -> token (* Predicates *) @@ -343,6 +343,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) = type Error.t += Broken_string type Error.t += Invalid_character_in_string type Error.t += Reserved_name + type Error.t += Invalid_symbol type Error.t += Invalid_natural let error_to_string = function @@ -386,6 +387,9 @@ module Make (Token: TOKEN) : (S with module Token = Token) = | Reserved_name -> "Reserved named.\n\ Hint: Change the name.\n" + | Invalid_symbol -> + "Invalid symbol.\n\ + Hint: Check the LIGO syntax you use.\n" | Invalid_natural -> "Invalid natural." | _ -> assert false @@ -487,8 +491,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = in Token.mk_constr lexeme region, state let mk_sym state buffer = - let region, lexeme, state = sync state buffer - in Token.mk_sym lexeme region, state + let region, lexeme, state = sync state buffer in + match Token.mk_sym lexeme region with + Ok token -> token, state + | Error Token.Invalid_symbol -> fail region Invalid_symbol let mk_eof state buffer = let region, _, state = sync state buffer @@ -518,12 +524,17 @@ let byte_seq = byte | byte (byte | '_')* byte let bytes = "0x" (byte_seq? as seq) let esc = "\\n" | "\\\"" | "\\\\" | "\\b" | "\\r" | "\\t" | "\\x" byte -let symbol = ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}' - | '#' | '|' | "->" | ":=" | '=' | ':' - | '<' | "<=" | '>' | ">=" | "=/=" | "<>" - | '+' | '-' | '*' | '/' | '.' | '_' | '^' - | "::" | "||" | "&&" -let string = [^'"' '\\' '\n']* (* For strings of #include *) +let pascaligo_sym = "=/=" | '#' | ":=" +let cameligo_sym = "<>" | "::" | "||" | "&&" + +let symbol = + ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}' +| '=' | ':' | '|' | "->" | '.' | '_' | '^' +| '+' | '-' | '*' | '/' +| '<' | "<=" | '>' | ">=" +| pascaligo_sym | cameligo_sym + +let string = [^'"' '\\' '\n']* (* For strings of #include *) (* RULES *)