Additions and refactoring so the AST gets even closer to that of PAscaligo.

* Added type annotations for expressions.
* Added bytes.
* Changed the AST for function calls
* Changed the AST for constructor applications
This commit is contained in:
Christian Rinderknecht 2019-05-14 15:56:08 +02:00 committed by Georges Dupéron
parent 011ae44b54
commit 7de4a1802a
6 changed files with 85 additions and 61 deletions

View File

@ -99,14 +99,6 @@ type 'a par = {
type the_unit = lpar * rpar type the_unit = lpar * rpar
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* The Abstract Syntax Tree *) (* The Abstract Syntax Tree *)
type t = { type t = {
@ -165,7 +157,7 @@ and field_decl = {
field_type : type_expr field_type : type_expr
} }
and type_tuple = (type_expr, comma) Utils.nsepseq par and type_tuple = (type_expr, comma) Utils.nsepseq par reg
and pattern = and pattern =
PTuple of (pattern, comma) Utils.nsepseq reg PTuple of (pattern, comma) Utils.nsepseq reg
@ -202,24 +194,29 @@ and field_pattern = {
and expr = and expr =
ECase of expr case reg ECase of expr case reg
| EAnnot of annot_expr reg
| ELogic of logic_expr | ELogic of logic_expr
| EArith of arith_expr | EArith of arith_expr
| EString of string_expr | EString of string_expr
| EList of list_expr | EList of list_expr
| EConstr of constr | EConstr of constr_expr reg
| ERecord of record_expr | ERecord of record_expr
| EProj of projection reg | EProj of projection reg
| EVar of variable | EVar of variable
| ECall of (expr * expr) reg | ECall of (expr * expr list) reg
| EBytes of (string * Hex.t) reg
| EUnit of the_unit reg | EUnit of the_unit reg
| ETuple of (expr, comma) Utils.nsepseq reg | ETuple of (expr, comma) Utils.nsepseq reg
| EPar of expr par reg | EPar of expr par reg
| ELetIn of let_in reg | ELetIn of let_in reg
| EFun of fun_expr | EFun of fun_expr
| ECond of conditional reg | ECond of conditional reg
| ESeq of sequence | ESeq of sequence
and constr_expr = constr * expr option
and annot_expr = expr * type_expr
and 'a injection = { and 'a injection = {
opening : opening; opening : opening;
elements : ('a, semi) Utils.sepseq; elements : ('a, semi) Utils.sepseq;
@ -382,10 +379,10 @@ let region_of_expr = function
| EArith e -> region_of_arith_expr e | EArith e -> region_of_arith_expr e
| EString e -> region_of_string_expr e | EString e -> region_of_string_expr e
| EList e -> region_of_list_expr e | EList e -> region_of_list_expr e
| ELetIn {region;_} | EFun {region;_} | EAnnot {region;_ } | ELetIn {region;_} | EFun {region;_}
| ECond {region;_} | ETuple {region;_} | ECase {region;_} | ECond {region;_} | ETuple {region;_} | ECase {region;_}
| ECall {region;_} | EVar {region; _} | EProj {region; _} | ECall {region;_} | EVar {region; _} | EProj {region; _}
| EUnit {region;_} | EPar {region;_} | EUnit {region;_} | EPar {region;_} | EBytes {region; _}
| ESeq {region; _} | ERecord {region; _} | ESeq {region; _} | ERecord {region; _}
| EConstr {region; _} -> region | EConstr {region; _} -> region
@ -480,6 +477,10 @@ let print_uident Region.{region; value} =
let print_str Region.{region; value} = let print_str Region.{region; value} =
Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value
let print_bytes Region.{region; value=lexeme, abstract} =
Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n"
(region#compact `Byte) lexeme (Hex.to_string abstract)
let rec print_tokens ?(undo=false) {decl;eof} = let rec print_tokens ?(undo=false) {decl;eof} =
Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF" Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF"
@ -516,7 +517,8 @@ and print_type_app {value; _} =
print_type_tuple type_tuple; print_type_tuple type_tuple;
print_var type_constr print_var type_constr
and print_type_tuple {lpar; inside; rpar} = and print_type_tuple {value; _} =
let {lpar; inside; rpar} = value in
print_token lpar "("; print_token lpar "(";
print_nsepseq "," print_type_expr inside; print_nsepseq "," print_type_expr inside;
print_token rpar ")" print_token rpar ")"
@ -672,21 +674,31 @@ and print_expr undo = function
print_expr undo expr print_expr undo expr
else print_fun_expr undo f else print_fun_expr undo f
| EAnnot e -> print_annot_expr undo e
| ELogic e -> print_logic_expr undo e | ELogic e -> print_logic_expr undo e
| EArith e -> print_arith_expr undo e | EArith e -> print_arith_expr undo e
| EString e -> print_string_expr undo e | EString e -> print_string_expr undo e
| ECall {value=e1,e2; _} -> print_expr undo e1; print_expr undo e2 | ECall {value=f,l; _} ->
print_expr undo f; List.iter (print_expr undo) l
| EVar v -> print_var v | EVar v -> print_var v
| EProj p -> print_projection p | EProj p -> print_projection p
| EUnit {value=lpar,rpar; _} -> | EUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")" print_token lpar "("; print_token rpar ")"
| EBytes b -> print_bytes b
| EPar {value={lpar;inside=e;rpar}; _} -> | EPar {value={lpar;inside=e;rpar}; _} ->
print_token lpar "("; print_expr undo e; print_token rpar ")" print_token lpar "("; print_expr undo e; print_token rpar ")"
| EList e -> print_list_expr undo e | EList e -> print_list_expr undo e
| ESeq seq -> print_sequence undo seq | ESeq seq -> print_sequence undo seq
| ERecord e -> print_record_expr undo e | ERecord e -> print_record_expr undo e
| EConstr constr -> print_uident constr | EConstr {value=constr,None; _} -> print_uident constr
| EConstr {value=(constr, Some arg); _} ->
print_uident constr; print_expr undo arg
and print_annot_expr undo {value=e,t; _} =
print_expr undo e;
print_token Region.ghost ":";
print_type_expr t
and print_list_expr undo = function and print_list_expr undo = function
Cons {value={arg1;op;arg2}; _} -> Cons {value={arg1;op;arg2}; _} ->

View File

@ -106,14 +106,6 @@ type 'a par = {
type the_unit = lpar * rpar type the_unit = lpar * rpar
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* The Abstract Syntax Tree (finally) *) (* The Abstract Syntax Tree (finally) *)
type t = { type t = {
@ -175,7 +167,7 @@ and field_decl = {
field_type : type_expr field_type : type_expr
} }
and type_tuple = (type_expr, comma) Utils.nsepseq par and type_tuple = (type_expr, comma) Utils.nsepseq par reg
and pattern = and pattern =
PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *) PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *)
@ -212,24 +204,29 @@ and field_pattern = {
and expr = and expr =
ECase of expr case reg (* p1 -> e1 | p2 -> e2 | ... *) ECase of expr case reg (* p1 -> e1 | p2 -> e2 | ... *)
| EAnnot of annot_expr reg (* e : t *)
| ELogic of logic_expr | ELogic of logic_expr
| EArith of arith_expr | EArith of arith_expr
| EString of string_expr | EString of string_expr
| EList of list_expr | EList of list_expr
| EConstr of constr | EConstr of constr_expr reg
| ERecord of record_expr (* {f1=e1; ... } *) | ERecord of record_expr (* {f1=e1; ... } *)
| EProj of projection reg (* x.y.z M.x.y *) | EProj of projection reg (* x.y.z M.x.y *)
| EVar of variable (* x *) | EVar of variable (* x *)
| ECall of (expr * expr) reg (* f e *) | ECall of (expr * expr list) reg (* e e1 ... en *)
| EBytes of (string * Hex.t) reg (* 0xAEFF *)
| EUnit of the_unit reg (* () *) | EUnit of the_unit reg (* () *)
| ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *) | ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *)
| EPar of expr par reg (* (e) *) | EPar of expr par reg (* (e) *)
| ELetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *) | ELetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *)
| EFun of fun_expr (* fun x -> e *) | EFun of fun_expr (* fun x -> e *)
| ECond of conditional reg (* if e1 then e2 else e3 *) | ECond of conditional reg (* if e1 then e2 else e3 *)
| ESeq of sequence (* begin e1; e2; ... ; en end *) | ESeq of sequence (* begin e1; e2; ... ; en end *)
and constr_expr = constr * expr option
and annot_expr = expr * type_expr
and 'a injection = { and 'a injection = {
opening : opening; opening : opening;
elements : ('a, semi) Utils.sepseq; elements : ('a, semi) Utils.sepseq;

View File

@ -202,6 +202,8 @@ let tparam = "'" ident (* Type parameters. Unused yet *)
let hexa = digit | ['A'-'F'] let hexa = digit | ['A'-'F']
let byte = hexa hexa let byte = hexa hexa
let byte_seq = byte | byte (byte | '_')* byte
let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\\" | "\\b" | "\\r" | "\\t" let esc = "\\n" | "\\\\" | "\\b" | "\\r" | "\\t"
let schar = [^'"' '\\'] # nl (* TODO: Test *) let schar = [^'"' '\\'] # nl (* TODO: Test *)
@ -259,9 +261,13 @@ rule scan = parse
| decimal as tz "tz" { | decimal as tz "tz" {
match format_tz tz with match format_tz tz with
Some z -> Token.Mtz (tz ^ "tz", z) Some z -> Token.Mtz (tz ^ "tz", z)
| None -> sprintf "Invalid tez amount." |> error lexbuf } | None -> sprintf "Invalid tez amount." |> error lexbuf
}
| uident as id { Token.Constr id } | uident as id { Token.Constr id }
| bytes {
let norm = Str.(global_replace (regexp "_") "" seq)
in Token.Bytes (seq, Hex.of_string norm)
}
| "let%init" { Token.Let } | "let%init" { Token.Let }
| "let%entry" { Token.LetEntry } | "let%entry" { Token.LetEntry }
| "match%nat" { Token.MatchNat } | "match%nat" { Token.MatchNat }

View File

@ -81,10 +81,7 @@ sep_or_term_list(item,sep):
(* Compound constructs *) (* Compound constructs *)
par(X): lpar X rpar { {lpar=$1; inside=$2; rpar=$3} } par(X): reg(lpar X rpar { {lpar=$1; inside=$2; rpar=$3} }) { $1 }
brackets(X): lbracket X rbracket {
{lbracket=$1; inside=$2; rbracket=$3} }
(* Sequences (* Sequences
@ -181,17 +178,19 @@ core_type:
type_projection { type_projection {
TAlias $1 TAlias $1
} }
| reg(core_type type_constr {$1,$2}) { | reg(reg(core_type) type_constr {$1,$2}) {
let arg, constr = $1.value in let arg, constr = $1.value in
let Region.{value=arg_val; _} = arg in
let lpar, rpar = Region.ghost, Region.ghost in let lpar, rpar = Region.ghost, Region.ghost in
let arg = {lpar; inside=arg,[]; rpar} in let arg_val = {lpar; inside=arg_val,[]; rpar} in
let arg = {arg with value=arg_val} in
TApp Region.{$1 with value = constr, arg} TApp Region.{$1 with value = constr, arg}
} }
| reg(type_tuple type_constr {$1,$2}) { | reg(type_tuple type_constr {$1,$2}) {
let arg, constr = $1.value in let arg, constr = $1.value in
TApp Region.{$1 with value = constr, arg} TApp Region.{$1 with value = constr, arg}
} }
| reg(par(cartesian)) { | par(cartesian) {
let Region.{region; value={lpar; inside=prod; rpar}} = $1 in let Region.{region; value={lpar; inside=prod; rpar}} = $1 in
TPar Region.{region; value={lpar; inside = TProd prod; rpar}} } TPar Region.{region; value={lpar; inside = TProd prod; rpar}} }
@ -259,7 +258,7 @@ sub_irrefutable:
ident { PVar $1 } ident { PVar $1 }
| wild { PWild $1 } | wild { PWild $1 }
| unit { PUnit $1 } | unit { PUnit $1 }
| reg(par(closed_irrefutable)) { PPar $1 } | par(closed_irrefutable) { PPar $1 }
closed_irrefutable: closed_irrefutable:
reg(tuple(sub_irrefutable)) { PTuple $1 } reg(tuple(sub_irrefutable)) { PTuple $1 }
@ -276,7 +275,7 @@ pattern:
| core_pattern { $1 } | core_pattern { $1 }
sub_pattern: sub_pattern:
reg(par(tail)) { PPar $1 } par(tail) { PPar $1 }
| core_pattern { $1 } | core_pattern { $1 }
core_pattern: core_pattern:
@ -287,7 +286,7 @@ core_pattern:
| kwd(True) { PTrue $1 } | kwd(True) { PTrue $1 }
| kwd(False) { PFalse $1 } | kwd(False) { PFalse $1 }
| string { PString $1 } | string { PString $1 }
| reg(par(ptuple)) { PPar $1 } | par(ptuple) { PPar $1 }
| reg(list_of(tail)) { PList (Sugar $1) } | reg(list_of(tail)) { PList (Sugar $1) }
| reg(constr_pattern) { PConstr $1 } | reg(constr_pattern) { PConstr $1 }
| reg(record_pattern) { PRecord $1 } | reg(record_pattern) { PRecord $1 }
@ -376,7 +375,7 @@ match_expr(right_expr):
let open Region in let open Region in
let cases = Utils.nsepseq_rev $5.value in let cases = Utils.nsepseq_rev $5.value in
let cast = EVar {region=ghost; value="assert_pos"} in let cast = EVar {region=ghost; value="assert_pos"} in
let cast = ECall {region=ghost; value=cast,$2} in let cast = ECall {region=ghost; value=cast,[$2]} in
{kwd_match = $1; expr = cast; opening = With $3; {kwd_match = $1; expr = cast; opening = With $3;
lead_vbar = $4; cases = {$5 with value=cases}; lead_vbar = $4; cases = {$5 with value=cases};
closing = End Region.ghost} } closing = End Region.ghost} }
@ -496,17 +495,21 @@ unary_expr_level:
| call_expr_level { $1 } | call_expr_level { $1 }
uminus_expr: uminus_expr:
un_op(sym(MINUS), core_expr) { $1 } un_op(sym(MINUS), call_expr_level) { $1 }
not_expr: not_expr:
un_op(kwd(Not), core_expr) { $1 } un_op(kwd(Not), call_expr_level) { $1 }
call_expr_level: call_expr_level:
reg(call_expr) { ECall $1 } reg(call_expr) { ECall $1 }
| reg(constr_expr) { EConstr $1 }
| core_expr { $1 } | core_expr { $1 }
constr_expr:
constr core_expr? { $1,$2 }
call_expr: call_expr:
call_expr_level core_expr { $1,$2 } core_expr core_expr+ { $1,$2 }
core_expr: core_expr:
reg(Int) { EArith (Int $1) } reg(Int) { EArith (Int $1) }
@ -519,10 +522,11 @@ core_expr:
| kwd(False) { ELogic (BoolExpr (False $1)) } | kwd(False) { ELogic (BoolExpr (False $1)) }
| kwd(True) { ELogic (BoolExpr (True $1)) } | kwd(True) { ELogic (BoolExpr (True $1)) }
| reg(list_of(expr)) { EList (List $1) } | reg(list_of(expr)) { EList (List $1) }
| reg(par(expr)) { EPar $1 } | par(expr) { EPar $1 }
| constr { EConstr $1 }
| reg(sequence) { ESeq $1 } | reg(sequence) { ESeq $1 }
| reg(record_expr) { ERecord $1 } | reg(record_expr) { ERecord $1 }
| par(expr colon type_expr {$1,$3}) {
EAnnot {$1 with value=$1.value.inside} }
module_field: module_field:
module_name dot field_name { $1.value ^ "." ^ $3.value } module_name dot field_name { $1.value ^ "." ^ $3.value }
@ -541,7 +545,7 @@ projection:
selection: selection:
field_name { FieldName $1 } field_name { FieldName $1 }
| reg(par(reg(Int))) { Component $1 } | par(reg(Int)) { Component $1 }
record_expr: record_expr:
lbrace sep_or_term_list(reg(field_assignment),semi) rbrace { lbrace sep_or_term_list(reg(field_assignment),semi) rbrace {

View File

@ -40,6 +40,7 @@ type t =
| Nat of (string * Z.t) | Nat of (string * Z.t)
| Mtz of (string * Z.t) | Mtz of (string * Z.t)
| Str of string | Str of string
| Bytes of (string * Hex.t)
(* Keywords *) (* Keywords *)
@ -80,6 +81,8 @@ type t =
type token = t type token = t
let sprintf = Printf.sprintf
let to_string = function let to_string = function
ARROW -> "->" ARROW -> "->"
| CONS -> "::" | CONS -> "::"
@ -109,12 +112,13 @@ let to_string = function
| GE -> ">=" | GE -> ">="
| BOOL_OR -> "||" | BOOL_OR -> "||"
| BOOL_AND -> "&&" | BOOL_AND -> "&&"
| Ident id -> Printf.sprintf "Ident %s" id | Ident id -> sprintf "Ident %s" id
| Constr id -> Printf.sprintf "Constr %s" id | Constr id -> sprintf "Constr %s" id
| Int (lex,z) -> Printf.sprintf "Int %s (%s)" lex (Z.to_string z) | Int (lex,z) -> sprintf "Int %s (%s)" lex (Z.to_string z)
| Nat (lex,z) -> Printf.sprintf "Nat %s (%s)" lex (Z.to_string z) | Nat (lex,z) -> sprintf "Nat %s (%s)" lex (Z.to_string z)
| Mtz (lex,z) -> Printf.sprintf "Mtz %s (%s)" lex (Z.to_string z) | Mtz (lex,z) -> sprintf "Mtz %s (%s)" lex (Z.to_string z)
| Str n -> Printf.sprintf "Str \"%s\"" n | Str n -> sprintf "Str \"%s\"" n
| Bytes (lex,h) -> sprintf "Bytes %s (0x%s)" lex (Hex.to_string h)
| And -> "and" | And -> "and"
| Begin -> "begin" | Begin -> "begin"
| Else -> "else" | Else -> "else"

View File

@ -56,6 +56,7 @@ type t =
| Nat of (string * Z.t) | Nat of (string * Z.t)
| Mtz of (string * Z.t) | Mtz of (string * Z.t)
| Str of string | Str of string
| Bytes of (string * Hex.t)
(* Keywords *) (* Keywords *)