From 7de4a1802a4195282ffa88a29424d50a9c85ccde Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Tue, 14 May 2019 15:56:08 +0200 Subject: [PATCH] Additions and refactoring so the AST gets even closer to that of PAscaligo. * Added type annotations for expressions. * Added bytes. * Changed the AST for function calls * Changed the AST for constructor applications --- src/parser/ligodity/AST.ml | 46 +++++++++++++++++++++------------- src/parser/ligodity/AST.mli | 27 +++++++++----------- src/parser/ligodity/Lexer.mll | 12 ++++++--- src/parser/ligodity/Parser.mly | 44 +++++++++++++++++--------------- src/parser/ligodity/Token.ml | 16 +++++++----- src/parser/ligodity/Token.mli | 1 + 6 files changed, 85 insertions(+), 61 deletions(-) diff --git a/src/parser/ligodity/AST.ml b/src/parser/ligodity/AST.ml index bd9cbd57b..97ab55161 100644 --- a/src/parser/ligodity/AST.ml +++ b/src/parser/ligodity/AST.ml @@ -99,14 +99,6 @@ type 'a par = { type the_unit = lpar * rpar -(* Brackets compounds *) - -type 'a brackets = { - lbracket : lbracket; - inside : 'a; - rbracket : rbracket -} - (* The Abstract Syntax Tree *) type t = { @@ -165,7 +157,7 @@ and field_decl = { field_type : type_expr } -and type_tuple = (type_expr, comma) Utils.nsepseq par +and type_tuple = (type_expr, comma) Utils.nsepseq par reg and pattern = PTuple of (pattern, comma) Utils.nsepseq reg @@ -202,24 +194,29 @@ and field_pattern = { and expr = ECase of expr case reg +| EAnnot of annot_expr reg | ELogic of logic_expr | EArith of arith_expr | EString of string_expr | EList of list_expr -| EConstr of constr +| EConstr of constr_expr reg | ERecord of record_expr | EProj of projection reg | EVar of variable -| ECall of (expr * expr) reg +| ECall of (expr * expr list) reg +| EBytes of (string * Hex.t) reg | EUnit of the_unit reg | ETuple of (expr, comma) Utils.nsepseq reg | EPar of expr par reg - | ELetIn of let_in reg | EFun of fun_expr | ECond of conditional reg | ESeq of sequence +and constr_expr = constr * expr option + +and annot_expr = expr * type_expr + and 'a injection = { opening : opening; elements : ('a, semi) Utils.sepseq; @@ -382,10 +379,10 @@ let region_of_expr = function | EArith e -> region_of_arith_expr e | EString e -> region_of_string_expr e | EList e -> region_of_list_expr e -| ELetIn {region;_} | EFun {region;_} +| EAnnot {region;_ } | ELetIn {region;_} | EFun {region;_} | ECond {region;_} | ETuple {region;_} | ECase {region;_} | ECall {region;_} | EVar {region; _} | EProj {region; _} -| EUnit {region;_} | EPar {region;_} +| EUnit {region;_} | EPar {region;_} | EBytes {region; _} | ESeq {region; _} | ERecord {region; _} | EConstr {region; _} -> region @@ -480,6 +477,10 @@ let print_uident Region.{region; value} = let print_str Region.{region; value} = Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value +let print_bytes Region.{region; value=lexeme, abstract} = + Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n" + (region#compact `Byte) lexeme (Hex.to_string abstract) + let rec print_tokens ?(undo=false) {decl;eof} = Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF" @@ -516,7 +517,8 @@ and print_type_app {value; _} = print_type_tuple type_tuple; print_var type_constr -and print_type_tuple {lpar; inside; rpar} = +and print_type_tuple {value; _} = + let {lpar; inside; rpar} = value in print_token lpar "("; print_nsepseq "," print_type_expr inside; print_token rpar ")" @@ -672,21 +674,31 @@ and print_expr undo = function print_expr undo expr else print_fun_expr undo f +| EAnnot e -> print_annot_expr undo e | ELogic e -> print_logic_expr undo e | EArith e -> print_arith_expr undo e | EString e -> print_string_expr undo e -| ECall {value=e1,e2; _} -> print_expr undo e1; print_expr undo e2 +| ECall {value=f,l; _} -> + print_expr undo f; List.iter (print_expr undo) l | EVar v -> print_var v | EProj p -> print_projection p | EUnit {value=lpar,rpar; _} -> print_token lpar "("; print_token rpar ")" +| EBytes b -> print_bytes b | EPar {value={lpar;inside=e;rpar}; _} -> print_token lpar "("; print_expr undo e; print_token rpar ")" | EList e -> print_list_expr undo e | ESeq seq -> print_sequence undo seq | ERecord e -> print_record_expr undo e -| EConstr constr -> print_uident constr +| EConstr {value=constr,None; _} -> print_uident constr +| EConstr {value=(constr, Some arg); _} -> + print_uident constr; print_expr undo arg + +and print_annot_expr undo {value=e,t; _} = + print_expr undo e; + print_token Region.ghost ":"; + print_type_expr t and print_list_expr undo = function Cons {value={arg1;op;arg2}; _} -> diff --git a/src/parser/ligodity/AST.mli b/src/parser/ligodity/AST.mli index 914137179..9f21f64fa 100644 --- a/src/parser/ligodity/AST.mli +++ b/src/parser/ligodity/AST.mli @@ -106,14 +106,6 @@ type 'a par = { type the_unit = lpar * rpar -(* Brackets compounds *) - -type 'a brackets = { - lbracket : lbracket; - inside : 'a; - rbracket : rbracket -} - (* The Abstract Syntax Tree (finally) *) type t = { @@ -175,7 +167,7 @@ and field_decl = { field_type : type_expr } -and type_tuple = (type_expr, comma) Utils.nsepseq par +and type_tuple = (type_expr, comma) Utils.nsepseq par reg and pattern = PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *) @@ -212,23 +204,28 @@ and field_pattern = { and expr = ECase of expr case reg (* p1 -> e1 | p2 -> e2 | ... *) +| EAnnot of annot_expr reg (* e : t *) | ELogic of logic_expr | EArith of arith_expr | EString of string_expr | EList of list_expr -| EConstr of constr +| EConstr of constr_expr reg | ERecord of record_expr (* {f1=e1; ... } *) | EProj of projection reg (* x.y.z M.x.y *) | EVar of variable (* x *) -| ECall of (expr * expr) reg (* f e *) +| ECall of (expr * expr list) reg (* e e1 ... en *) +| EBytes of (string * Hex.t) reg (* 0xAEFF *) | EUnit of the_unit reg (* () *) | ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *) | EPar of expr par reg (* (e) *) +| ELetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *) +| EFun of fun_expr (* fun x -> e *) +| ECond of conditional reg (* if e1 then e2 else e3 *) +| ESeq of sequence (* begin e1; e2; ... ; en end *) -| ELetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *) -| EFun of fun_expr (* fun x -> e *) -| ECond of conditional reg (* if e1 then e2 else e3 *) -| ESeq of sequence (* begin e1; e2; ... ; en end *) +and constr_expr = constr * expr option + +and annot_expr = expr * type_expr and 'a injection = { opening : opening; diff --git a/src/parser/ligodity/Lexer.mll b/src/parser/ligodity/Lexer.mll index 4c123fe5f..dcf38184a 100644 --- a/src/parser/ligodity/Lexer.mll +++ b/src/parser/ligodity/Lexer.mll @@ -202,6 +202,8 @@ let tparam = "'" ident (* Type parameters. Unused yet *) let hexa = digit | ['A'-'F'] let byte = hexa hexa +let byte_seq = byte | byte (byte | '_')* byte +let bytes = "0x" (byte_seq? as seq) let esc = "\\n" | "\\\\" | "\\b" | "\\r" | "\\t" let schar = [^'"' '\\'] # nl (* TODO: Test *) @@ -259,9 +261,13 @@ rule scan = parse | decimal as tz "tz" { match format_tz tz with Some z -> Token.Mtz (tz ^ "tz", z) - | None -> sprintf "Invalid tez amount." |> error lexbuf } - -| uident as id { Token.Constr id } + | None -> sprintf "Invalid tez amount." |> error lexbuf + } +| uident as id { Token.Constr id } +| bytes { + let norm = Str.(global_replace (regexp "_") "" seq) + in Token.Bytes (seq, Hex.of_string norm) + } | "let%init" { Token.Let } | "let%entry" { Token.LetEntry } | "match%nat" { Token.MatchNat } diff --git a/src/parser/ligodity/Parser.mly b/src/parser/ligodity/Parser.mly index 6c8777c4b..fbc323581 100644 --- a/src/parser/ligodity/Parser.mly +++ b/src/parser/ligodity/Parser.mly @@ -81,10 +81,7 @@ sep_or_term_list(item,sep): (* Compound constructs *) -par(X): lpar X rpar { {lpar=$1; inside=$2; rpar=$3} } - -brackets(X): lbracket X rbracket { - {lbracket=$1; inside=$2; rbracket=$3} } +par(X): reg(lpar X rpar { {lpar=$1; inside=$2; rpar=$3} }) { $1 } (* Sequences @@ -181,17 +178,19 @@ core_type: type_projection { TAlias $1 } -| reg(core_type type_constr {$1,$2}) { +| reg(reg(core_type) type_constr {$1,$2}) { let arg, constr = $1.value in + let Region.{value=arg_val; _} = arg in let lpar, rpar = Region.ghost, Region.ghost in - let arg = {lpar; inside=arg,[]; rpar} in + let arg_val = {lpar; inside=arg_val,[]; rpar} in + let arg = {arg with value=arg_val} in TApp Region.{$1 with value = constr, arg} } | reg(type_tuple type_constr {$1,$2}) { let arg, constr = $1.value in TApp Region.{$1 with value = constr, arg} } -| reg(par(cartesian)) { +| par(cartesian) { let Region.{region; value={lpar; inside=prod; rpar}} = $1 in TPar Region.{region; value={lpar; inside = TProd prod; rpar}} } @@ -259,7 +258,7 @@ sub_irrefutable: ident { PVar $1 } | wild { PWild $1 } | unit { PUnit $1 } -| reg(par(closed_irrefutable)) { PPar $1 } +| par(closed_irrefutable) { PPar $1 } closed_irrefutable: reg(tuple(sub_irrefutable)) { PTuple $1 } @@ -276,7 +275,7 @@ pattern: | core_pattern { $1 } sub_pattern: - reg(par(tail)) { PPar $1 } + par(tail) { PPar $1 } | core_pattern { $1 } core_pattern: @@ -287,7 +286,7 @@ core_pattern: | kwd(True) { PTrue $1 } | kwd(False) { PFalse $1 } | string { PString $1 } -| reg(par(ptuple)) { PPar $1 } +| par(ptuple) { PPar $1 } | reg(list_of(tail)) { PList (Sugar $1) } | reg(constr_pattern) { PConstr $1 } | reg(record_pattern) { PRecord $1 } @@ -376,7 +375,7 @@ match_expr(right_expr): let open Region in let cases = Utils.nsepseq_rev $5.value in let cast = EVar {region=ghost; value="assert_pos"} in - let cast = ECall {region=ghost; value=cast,$2} in + let cast = ECall {region=ghost; value=cast,[$2]} in {kwd_match = $1; expr = cast; opening = With $3; lead_vbar = $4; cases = {$5 with value=cases}; closing = End Region.ghost} } @@ -496,17 +495,21 @@ unary_expr_level: | call_expr_level { $1 } uminus_expr: - un_op(sym(MINUS), core_expr) { $1 } + un_op(sym(MINUS), call_expr_level) { $1 } not_expr: - un_op(kwd(Not), core_expr) { $1 } + un_op(kwd(Not), call_expr_level) { $1 } call_expr_level: - reg(call_expr) { ECall $1 } -| core_expr { $1 } + reg(call_expr) { ECall $1 } +| reg(constr_expr) { EConstr $1 } +| core_expr { $1 } + +constr_expr: + constr core_expr? { $1,$2 } call_expr: - call_expr_level core_expr { $1,$2 } + core_expr core_expr+ { $1,$2 } core_expr: reg(Int) { EArith (Int $1) } @@ -519,10 +522,11 @@ core_expr: | kwd(False) { ELogic (BoolExpr (False $1)) } | kwd(True) { ELogic (BoolExpr (True $1)) } | reg(list_of(expr)) { EList (List $1) } -| reg(par(expr)) { EPar $1 } -| constr { EConstr $1 } +| par(expr) { EPar $1 } | reg(sequence) { ESeq $1 } | reg(record_expr) { ERecord $1 } +| par(expr colon type_expr {$1,$3}) { + EAnnot {$1 with value=$1.value.inside} } module_field: module_name dot field_name { $1.value ^ "." ^ $3.value } @@ -540,8 +544,8 @@ projection: {struct_name; selector = $2; field_path = $3} } selection: - field_name { FieldName $1 } -| reg(par(reg(Int))) { Component $1 } + field_name { FieldName $1 } +| par(reg(Int)) { Component $1 } record_expr: lbrace sep_or_term_list(reg(field_assignment),semi) rbrace { diff --git a/src/parser/ligodity/Token.ml b/src/parser/ligodity/Token.ml index b0aa8778a..53337b2e8 100644 --- a/src/parser/ligodity/Token.ml +++ b/src/parser/ligodity/Token.ml @@ -40,6 +40,7 @@ type t = | Nat of (string * Z.t) | Mtz of (string * Z.t) | Str of string +| Bytes of (string * Hex.t) (* Keywords *) @@ -80,6 +81,8 @@ type t = type token = t +let sprintf = Printf.sprintf + let to_string = function ARROW -> "->" | CONS -> "::" @@ -109,12 +112,13 @@ let to_string = function | GE -> ">=" | BOOL_OR -> "||" | BOOL_AND -> "&&" -| Ident id -> Printf.sprintf "Ident %s" id -| Constr id -> Printf.sprintf "Constr %s" id -| Int (lex,z) -> Printf.sprintf "Int %s (%s)" lex (Z.to_string z) -| Nat (lex,z) -> Printf.sprintf "Nat %s (%s)" lex (Z.to_string z) -| Mtz (lex,z) -> Printf.sprintf "Mtz %s (%s)" lex (Z.to_string z) -| Str n -> Printf.sprintf "Str \"%s\"" n +| Ident id -> sprintf "Ident %s" id +| Constr id -> sprintf "Constr %s" id +| Int (lex,z) -> sprintf "Int %s (%s)" lex (Z.to_string z) +| Nat (lex,z) -> sprintf "Nat %s (%s)" lex (Z.to_string z) +| Mtz (lex,z) -> sprintf "Mtz %s (%s)" lex (Z.to_string z) +| Str n -> sprintf "Str \"%s\"" n +| Bytes (lex,h) -> sprintf "Bytes %s (0x%s)" lex (Hex.to_string h) | And -> "and" | Begin -> "begin" | Else -> "else" diff --git a/src/parser/ligodity/Token.mli b/src/parser/ligodity/Token.mli index 7d07943cb..9cabd69f7 100644 --- a/src/parser/ligodity/Token.mli +++ b/src/parser/ligodity/Token.mli @@ -56,6 +56,7 @@ type t = | Nat of (string * Z.t) | Mtz of (string * Z.t) | Str of string +| Bytes of (string * Hex.t) (* Keywords *)