From 6c8b76b3de75185739c9741bde6aab402356bf55 Mon Sep 17 00:00:00 2001 From: Pierre-Emmanuel Wulfman Date: Mon, 27 Apr 2020 11:31:16 +0200 Subject: [PATCH] add verbatim in lexer and parser --- src/passes/1-parser/cameligo/AST.ml | 42 ++++++++-------- src/passes/1-parser/cameligo/LexToken.mli | 40 ++++++++------- src/passes/1-parser/cameligo/LexToken.mll | 25 ++++++---- src/passes/1-parser/cameligo/ParToken.mly | 17 ++++--- src/passes/1-parser/cameligo/Parser.mly | 2 + src/passes/1-parser/cameligo/ParserLog.ml | 17 ++++++- src/passes/1-parser/cameligo/Scoping.ml | 3 +- src/passes/1-parser/pascaligo/AST.ml | 10 ++-- src/passes/1-parser/pascaligo/LexToken.mli | 38 +++++++------- src/passes/1-parser/pascaligo/LexToken.mll | 24 ++++++--- src/passes/1-parser/pascaligo/ParToken.mly | 15 +++--- src/passes/1-parser/pascaligo/Parser.mly | 1 + src/passes/1-parser/pascaligo/ParserLog.ml | 5 ++ src/passes/1-parser/reasonligo/LexToken.mli | 40 ++++++++------- src/passes/1-parser/reasonligo/LexToken.mll | 22 +++++--- src/passes/1-parser/reasonligo/ParToken.mly | 17 ++++--- src/passes/1-parser/reasonligo/Parser.mly | 26 +++++----- src/passes/1-parser/shared/Lexer.mli | 21 ++++---- src/passes/1-parser/shared/Lexer.mll | 50 ++++++++++++++----- src/passes/10-interpreter/interpreter.ml | 2 +- src/passes/10-transpiler/transpiler.ml | 2 +- src/passes/10-transpiler/untranspiler.ml | 2 + .../2-concrete_to_imperative/cameligo.ml | 12 ++--- .../2-concrete_to_imperative/pascaligo.ml | 11 ++-- .../tezos_type_annotation.ml | 18 ++++--- .../9-self_ast_typed/contract_passes.ml | 2 +- src/stages/1-ast_imperative/combinators.ml | 3 +- src/stages/1-ast_imperative/combinators.mli | 1 + src/stages/2-ast_sugar/combinators.mli | 2 +- src/stages/3-ast_core/combinators.mli | 2 +- src/stages/4-ast_typed/PP.ml | 2 +- src/stages/4-ast_typed/PP_generic.ml | 2 + src/stages/4-ast_typed/combinators.mli | 4 +- .../4-ast_typed/combinators_environment.mli | 2 +- src/stages/4-ast_typed/types.ml | 2 +- src/stages/4-ast_typed/types_utils.ml | 1 + src/stages/common/PP.ml | 2 +- src/stages/common/types.ml | 3 +- src/test/contracts/string.ligo | 1 + src/test/typer_tests.ml | 6 +-- 40 files changed, 296 insertions(+), 201 deletions(-) diff --git a/src/passes/1-parser/cameligo/AST.ml b/src/passes/1-parser/cameligo/AST.ml index df94dc783..1282806fb 100644 --- a/src/passes/1-parser/cameligo/AST.ml +++ b/src/passes/1-parser/cameligo/AST.ml @@ -184,21 +184,22 @@ and field_decl = { and type_tuple = (type_expr, comma) nsepseq par reg and pattern = - PConstr of constr_pattern -| PUnit of the_unit reg -| PFalse of kwd_false -| PTrue of kwd_true -| PVar of variable -| PInt of (Lexer.lexeme * Z.t) reg -| PNat of (Lexer.lexeme * Z.t) reg -| PBytes of (Lexer.lexeme * Hex.t) reg -| PString of string reg -| PWild of wild -| PList of list_pattern -| PTuple of (pattern, comma) nsepseq reg -| PPar of pattern par reg -| PRecord of field_pattern reg ne_injection reg -| PTyped of typed_pattern reg + PConstr of constr_pattern +| PUnit of the_unit reg +| PFalse of kwd_false +| PTrue of kwd_true +| PVar of variable +| PInt of (Lexer.lexeme * Z.t) reg +| PNat of (Lexer.lexeme * Z.t) reg +| PBytes of (Lexer.lexeme * Hex.t) reg +| PString of string reg +| PVerbatim of string reg +| PWild of wild +| PList of list_pattern +| PTuple of (pattern, comma) nsepseq reg +| PPar of pattern par reg +| PRecord of field_pattern reg ne_injection reg +| PTyped of typed_pattern reg and constr_pattern = PNone of c_None @@ -266,8 +267,9 @@ and list_expr = (*| Append of (expr * append * expr) reg*) and string_expr = - Cat of cat bin_op reg -| String of string reg + Cat of cat bin_op reg +| String of string reg +| Verbatim of string reg and constr_expr = ENone of c_None @@ -425,8 +427,8 @@ let pattern_to_region = function | PTrue region | PFalse region | PTuple {region;_} | PVar {region;_} | PInt {region;_} -| PString {region;_} | PWild region -| PPar {region;_} +| PString {region;_} | PVerbatim {region;_} +| PWild region | PPar {region;_} | PRecord {region; _} | PTyped {region; _} | PNat {region; _} | PBytes {region; _} -> region @@ -452,7 +454,7 @@ let arith_expr_to_region = function | Nat {region; _} -> region let string_expr_to_region = function - String {region;_} | Cat {region;_} -> region + Verbatim {region;_} | String {region;_} | Cat {region;_} -> region let list_expr_to_region = function ECons {region; _} | EListComp {region; _} diff --git a/src/passes/1-parser/cameligo/LexToken.mli b/src/passes/1-parser/cameligo/LexToken.mli index 4af326823..fabddb2fa 100644 --- a/src/passes/1-parser/cameligo/LexToken.mli +++ b/src/passes/1-parser/cameligo/LexToken.mli @@ -78,14 +78,15 @@ type t = (* Identifiers, labels, numbers and strings *) -| Ident of string Region.reg -| Constr of string Region.reg -| Int of (string * Z.t) Region.reg -| Nat of (string * Z.t) Region.reg -| Mutez of (string * Z.t) Region.reg -| String of string Region.reg -| Bytes of (string * Hex.t) Region.reg -| Attr of string Region.reg +| Ident of string Region.reg +| Constr of string Region.reg +| Int of (string * Z.t) Region.reg +| Nat of (string * Z.t) Region.reg +| Mutez of (string * Z.t) Region.reg +| String of string Region.reg +| Verbatim of string Region.reg +| Bytes of (string * Hex.t) Region.reg +| Attr of string Region.reg (* Keywords *) @@ -142,17 +143,18 @@ type sym_err = Invalid_symbol type attr_err = Invalid_attribute type kwd_err = Invalid_keyword -val mk_int : lexeme -> Region.t -> (token, int_err) result -val mk_nat : lexeme -> Region.t -> (token, nat_err) result -val mk_mutez : lexeme -> Region.t -> (token, int_err) result -val mk_ident : lexeme -> Region.t -> (token, ident_err) result -val mk_sym : lexeme -> Region.t -> (token, sym_err) result -val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result -val mk_string : lexeme -> Region.t -> token -val mk_bytes : lexeme -> Region.t -> token -val mk_constr : lexeme -> Region.t -> token -val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result -val eof : Region.t -> token +val mk_int : lexeme -> Region.t -> (token, int_err) result +val mk_nat : lexeme -> Region.t -> (token, nat_err) result +val mk_mutez : lexeme -> Region.t -> (token, int_err) result +val mk_ident : lexeme -> Region.t -> (token, ident_err) result +val mk_sym : lexeme -> Region.t -> (token, sym_err) result +val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result +val mk_string : lexeme -> Region.t -> token +val mk_verbatim : lexeme -> Region.t -> token +val mk_bytes : lexeme -> Region.t -> token +val mk_constr : lexeme -> Region.t -> token +val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result +val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/cameligo/LexToken.mll b/src/passes/1-parser/cameligo/LexToken.mll index a87150cf7..509e5fae2 100644 --- a/src/passes/1-parser/cameligo/LexToken.mll +++ b/src/passes/1-parser/cameligo/LexToken.mll @@ -62,14 +62,15 @@ type t = (* Identifiers, labels, numbers and strings *) -| Ident of string Region.reg -| Constr of string Region.reg -| Int of (string * Z.t) Region.reg -| Nat of (string * Z.t) Region.reg -| Mutez of (string * Z.t) Region.reg -| String of string Region.reg -| Bytes of (string * Hex.t) Region.reg -| Attr of string Region.reg +| Ident of string Region.reg +| Constr of string Region.reg +| Int of (string * Z.t) Region.reg +| Nat of (string * Z.t) Region.reg +| Mutez of (string * Z.t) Region.reg +| String of string Region.reg +| Verbatim of string Region.reg +| Bytes of (string * Hex.t) Region.reg +| Attr of string Region.reg (* Keywords *) @@ -111,7 +112,9 @@ let proj_token = function (* Literals *) String Region.{region; value} -> - region, sprintf "Str %s" value + region, sprintf "String %s" value +| Verbatim Region.{region; value} -> + region, sprintf "Verbatim {|%s|}" value | Bytes Region.{region; value = s,b} -> region, sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b) @@ -193,6 +196,7 @@ let to_lexeme = function (* Literals *) String s -> String.escaped s.Region.value +| Verbatim v -> String.escaped v.Region.value | Bytes b -> fst b.Region.value | Int i | Nat i @@ -405,6 +409,9 @@ and scan_constr region lexicon = parse let mk_string lexeme region = String Region.{region; value=lexeme} +let mk_verbatim lexeme region = + Verbatim Region.{region; value=lexeme} + let mk_bytes lexeme region = let norm = Str.(global_replace (regexp "_") "" lexeme) in let value = lexeme, `Hex norm diff --git a/src/passes/1-parser/cameligo/ParToken.mly b/src/passes/1-parser/cameligo/ParToken.mly index 0d32e61de..0214d56a3 100644 --- a/src/passes/1-parser/cameligo/ParToken.mly +++ b/src/passes/1-parser/cameligo/ParToken.mly @@ -5,14 +5,15 @@ (* Literals *) -%token String "" -%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" -%token <(string * Z.t) Region.reg> Int "" -%token <(string * Z.t) Region.reg> Nat "" -%token <(string * Z.t) Region.reg> Mutez "" -%token Ident "" -%token Constr "" -%token Attr "" +%token String "" +%token Verbatim "" +%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" +%token <(string * Z.t) Region.reg> Int "" +%token <(string * Z.t) Region.reg> Nat "" +%token <(string * Z.t) Region.reg> Mutez "" +%token Ident "" +%token Constr "" +%token Attr "" (* Symbols *) diff --git a/src/passes/1-parser/cameligo/Parser.mly b/src/passes/1-parser/cameligo/Parser.mly index 12352480d..7d7f32be5 100644 --- a/src/passes/1-parser/cameligo/Parser.mly +++ b/src/passes/1-parser/cameligo/Parser.mly @@ -292,6 +292,7 @@ core_pattern: | "" { PNat $1 } | "" { PBytes $1 } | "" { PString $1 } +| "" { PVerbatim $1 } | unit { PUnit $1 } | "false" { PFalse $1 } | "true" { PTrue $1 } @@ -585,6 +586,7 @@ core_expr: | "" | module_field { EVar $1 } | projection { EProj $1 } | "" { EString (String $1) } +| "" { EString (Verbatim $1) } | unit { EUnit $1 } | "false" { ELogic (BoolExpr (False $1)) } | "true" { ELogic (BoolExpr (True $1)) } diff --git a/src/passes/1-parser/cameligo/ParserLog.ml b/src/passes/1-parser/cameligo/ParserLog.ml index 6ebe07c73..f1de7c5a6 100644 --- a/src/passes/1-parser/cameligo/ParserLog.ml +++ b/src/passes/1-parser/cameligo/ParserLog.ml @@ -97,7 +97,13 @@ let print_uident state {region; value} = let print_string state {region; value} = let line = - sprintf "%s: String %s\n" + sprintf "%s: String %S\n" + (compact state region) value + in Buffer.add_string state#buffer line + +let print_verbatim state {region; value} = + let line = + sprintf "%s: Verbatim {|%s|}\n" (compact state region) value in Buffer.add_string state#buffer line @@ -280,6 +286,7 @@ and print_pattern state = function | PNat i -> print_nat state i | PBytes b -> print_bytes state b | PString s -> print_string state s +| PVerbatim v -> print_verbatim state v | PWild wild -> print_token state wild "_" | PPar {value={lpar;inside=p;rpar}; _} -> print_token state lpar "("; @@ -459,6 +466,8 @@ and print_string_expr state = function print_expr state arg2 | String s -> print_string state s +| Verbatim v -> + print_verbatim state v and print_logic_expr state = function BoolExpr e -> print_bool_expr state e @@ -693,6 +702,9 @@ and pp_pattern state = function | PString s -> pp_node state "PString"; pp_string (state#pad 1 0) s +| PVerbatim v -> + pp_node state "PVerbatim"; + pp_string (state#pad 1 0) v | PUnit {region; _} -> pp_loc_node state "PUnit" region | PFalse region -> @@ -992,6 +1004,9 @@ and pp_string_expr state = function | String s -> pp_node state "String"; pp_string (state#pad 1 0) s +| Verbatim v -> + pp_node state "Verbatim"; + pp_string (state#pad 1 0) v and pp_arith_expr state = function Add {value; region} -> diff --git a/src/passes/1-parser/cameligo/Scoping.ml b/src/passes/1-parser/cameligo/Scoping.ml index 4b44a0189..651306022 100644 --- a/src/passes/1-parser/cameligo/Scoping.ml +++ b/src/passes/1-parser/cameligo/Scoping.ml @@ -77,7 +77,8 @@ let rec vars_of_pattern env = function PConstr p -> vars_of_pconstr env p | PUnit _ | PFalse _ | PTrue _ | PInt _ | PNat _ | PBytes _ -| PString _ | PWild _ -> env +| PString _ | PVerbatim _ +| PWild _ -> env | PVar var -> if VarSet.mem var env then raise (Error (Non_linear_pattern var)) diff --git a/src/passes/1-parser/pascaligo/AST.ml b/src/passes/1-parser/pascaligo/AST.ml index fa22a7b25..076bbe0fd 100644 --- a/src/passes/1-parser/pascaligo/AST.ml +++ b/src/passes/1-parser/pascaligo/AST.ml @@ -555,8 +555,9 @@ and arith_expr = | Mutez of (Lexer.lexeme * Z.t) reg and string_expr = - Cat of cat bin_op reg -| String of Lexer.lexeme reg + Cat of cat bin_op reg +| String of Lexer.lexeme reg +| Verbatim of Lexer.lexeme reg and list_expr = ECons of cons bin_op reg @@ -726,8 +727,9 @@ and arith_expr_to_region = function | Mutez {region; _} -> region and string_expr_to_region = function - Cat {region; _} -| String {region; _} -> region + Cat {region; _} +| String {region; _} +| Verbatim {region; _} -> region and annot_expr_to_region {region; _} = region diff --git a/src/passes/1-parser/pascaligo/LexToken.mli b/src/passes/1-parser/pascaligo/LexToken.mli index be263d9a5..a217a6370 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mli +++ b/src/passes/1-parser/pascaligo/LexToken.mli @@ -36,13 +36,14 @@ type attribute = { type t = (* Literals *) - String of lexeme Region.reg -| Bytes of (lexeme * Hex.t) Region.reg -| Int of (lexeme * Z.t) Region.reg -| Nat of (lexeme * Z.t) Region.reg -| Mutez of (lexeme * Z.t) Region.reg -| Ident of lexeme Region.reg -| Constr of lexeme Region.reg + String of lexeme Region.reg +| Verbatim of lexeme Region.reg +| Bytes of (lexeme * Hex.t) Region.reg +| Int of (lexeme * Z.t) Region.reg +| Nat of (lexeme * Z.t) Region.reg +| Mutez of (lexeme * Z.t) Region.reg +| Ident of lexeme Region.reg +| Constr of lexeme Region.reg (* Symbols *) @@ -149,17 +150,18 @@ type sym_err = Invalid_symbol type attr_err = Invalid_attribute type kwd_err = Invalid_keyword -val mk_int : lexeme -> Region.t -> (token, int_err) result -val mk_nat : lexeme -> Region.t -> (token, nat_err) result -val mk_mutez : lexeme -> Region.t -> (token, int_err) result -val mk_ident : lexeme -> Region.t -> (token, ident_err) result -val mk_sym : lexeme -> Region.t -> (token, sym_err) result -val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result -val mk_string : lexeme -> Region.t -> token -val mk_bytes : lexeme -> Region.t -> token -val mk_constr : lexeme -> Region.t -> token -val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result -val eof : Region.t -> token +val mk_int : lexeme -> Region.t -> (token, int_err) result +val mk_nat : lexeme -> Region.t -> (token, nat_err) result +val mk_mutez : lexeme -> Region.t -> (token, int_err) result +val mk_ident : lexeme -> Region.t -> (token, ident_err) result +val mk_sym : lexeme -> Region.t -> (token, sym_err) result +val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result +val mk_string : lexeme -> Region.t -> token +val mk_verbatim : lexeme -> Region.t -> token +val mk_bytes : lexeme -> Region.t -> token +val mk_constr : lexeme -> Region.t -> token +val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result +val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index 933db3cdb..4f28b9e71 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -24,13 +24,14 @@ type attribute = { type t = (* Literals *) - String of lexeme Region.reg -| Bytes of (lexeme * Hex.t) Region.reg -| Int of (lexeme * Z.t) Region.reg -| Nat of (lexeme * Z.t) Region.reg -| Mutez of (lexeme * Z.t) Region.reg -| Ident of lexeme Region.reg -| Constr of lexeme Region.reg + String of lexeme Region.reg +| Verbatim of lexeme Region.reg +| Bytes of (lexeme * Hex.t) Region.reg +| Int of (lexeme * Z.t) Region.reg +| Nat of (lexeme * Z.t) Region.reg +| Mutez of (lexeme * Z.t) Region.reg +| Ident of lexeme Region.reg +| Constr of lexeme Region.reg (* Symbols *) @@ -121,7 +122,11 @@ let proj_token = function (* Literals *) String Region.{region; value} -> - region, sprintf "String %s" value + region, sprintf "String %S" value + +| Verbatim Region.{region; value} -> + region, sprintf "Verbatim {|%s|}" value + | Bytes Region.{region; value = s,b} -> region, sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b) @@ -221,6 +226,7 @@ let to_lexeme = function (* Literals *) String s -> String.escaped s.Region.value +| Verbatim v -> String.escaped v.Region.value | Bytes b -> fst b.Region.value | Int i | Nat i @@ -442,6 +448,8 @@ and scan_constr region lexicon = parse let mk_string lexeme region = String Region.{region; value=lexeme} +let mk_verbatim lexeme region = Verbatim Region.{region; value=lexeme} + let mk_bytes lexeme region = let norm = Str.(global_replace (regexp "_") "" lexeme) in let value = lexeme, `Hex norm diff --git a/src/passes/1-parser/pascaligo/ParToken.mly b/src/passes/1-parser/pascaligo/ParToken.mly index d7d587fc2..49a90168e 100644 --- a/src/passes/1-parser/pascaligo/ParToken.mly +++ b/src/passes/1-parser/pascaligo/ParToken.mly @@ -5,13 +5,14 @@ (* Literals *) -%token String "" -%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" -%token <(LexToken.lexeme * Z.t) Region.reg> Int "" -%token <(LexToken.lexeme * Z.t) Region.reg> Nat "" -%token <(LexToken.lexeme * Z.t) Region.reg> Mutez "" -%token Ident "" -%token Constr "" +%token String "" +%token Verbatim "" +%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" +%token <(LexToken.lexeme * Z.t) Region.reg> Int "" +%token <(LexToken.lexeme * Z.t) Region.reg> Nat "" +%token <(LexToken.lexeme * Z.t) Region.reg> Mutez "" +%token Ident "" +%token Constr "" (* Symbols *) diff --git a/src/passes/1-parser/pascaligo/Parser.mly b/src/passes/1-parser/pascaligo/Parser.mly index 21d9420b7..668fb29e6 100644 --- a/src/passes/1-parser/pascaligo/Parser.mly +++ b/src/passes/1-parser/pascaligo/Parser.mly @@ -849,6 +849,7 @@ core_expr: | "" { EArith (Mutez $1) } | "" | module_field { EVar $1 } | "" { EString (String $1) } +| "" { EString (Verbatim $1) } | "" { EBytes $1 } | "False" { ELogic (BoolExpr (False $1)) } | "True" { ELogic (BoolExpr (True $1)) } diff --git a/src/passes/1-parser/pascaligo/ParserLog.ml b/src/passes/1-parser/pascaligo/ParserLog.ml index 6347b07f7..ae6ca8511 100644 --- a/src/passes/1-parser/pascaligo/ParserLog.ml +++ b/src/passes/1-parser/pascaligo/ParserLog.ml @@ -592,6 +592,8 @@ and print_string_expr state = function print_expr state arg2 | String s -> print_string state s +| Verbatim v -> + print_string state v and print_list_expr state = function ECons {value = {arg1; op; arg2}; _} -> @@ -1572,6 +1574,9 @@ and pp_string_expr state = function | String s -> pp_node state "String"; pp_string (state#pad 1 0) s +| Verbatim v -> + pp_node state "Verbatim"; + pp_string (state#pad 1 0) v and pp_annotated state (expr, t_expr) = pp_expr (state#pad 2 0) expr; diff --git a/src/passes/1-parser/reasonligo/LexToken.mli b/src/passes/1-parser/reasonligo/LexToken.mli index f568b5894..43c0bb9a3 100644 --- a/src/passes/1-parser/reasonligo/LexToken.mli +++ b/src/passes/1-parser/reasonligo/LexToken.mli @@ -81,14 +81,15 @@ type t = (* Identifiers, labels, numbers and strings *) -| Ident of string Region.reg -| Constr of string Region.reg -| Int of (string * Z.t) Region.reg -| Nat of (string * Z.t) Region.reg -| Mutez of (string * Z.t) Region.reg -| String of string Region.reg -| Bytes of (string * Hex.t) Region.reg -| Attr of string Region.reg +| Ident of string Region.reg +| Constr of string Region.reg +| Int of (string * Z.t) Region.reg +| Nat of (string * Z.t) Region.reg +| Mutez of (string * Z.t) Region.reg +| String of string Region.reg +| Verbatim of string Region.reg +| Bytes of (string * Hex.t) Region.reg +| Attr of string Region.reg (* Keywords *) @@ -141,17 +142,18 @@ type sym_err = Invalid_symbol type attr_err = Invalid_attribute type kwd_err = Invalid_keyword -val mk_int : lexeme -> Region.t -> (token, int_err) result -val mk_nat : lexeme -> Region.t -> (token, nat_err) result -val mk_mutez : lexeme -> Region.t -> (token, int_err) result -val mk_ident : lexeme -> Region.t -> (token, ident_err) result -val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result -val mk_sym : lexeme -> Region.t -> (token, sym_err) result -val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result -val mk_string : lexeme -> Region.t -> token -val mk_bytes : lexeme -> Region.t -> token -val mk_constr : lexeme -> Region.t -> token -val eof : Region.t -> token +val mk_int : lexeme -> Region.t -> (token, int_err) result +val mk_nat : lexeme -> Region.t -> (token, nat_err) result +val mk_mutez : lexeme -> Region.t -> (token, int_err) result +val mk_ident : lexeme -> Region.t -> (token, ident_err) result +val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result +val mk_sym : lexeme -> Region.t -> (token, sym_err) result +val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result +val mk_string : lexeme -> Region.t -> token +val mk_verbatim : lexeme -> Region.t -> token +val mk_bytes : lexeme -> Region.t -> token +val mk_constr : lexeme -> Region.t -> token +val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/reasonligo/LexToken.mll b/src/passes/1-parser/reasonligo/LexToken.mll index a4245fb6d..9021e93c4 100644 --- a/src/passes/1-parser/reasonligo/LexToken.mll +++ b/src/passes/1-parser/reasonligo/LexToken.mll @@ -67,14 +67,15 @@ type t = (* Identifiers, labels, numbers and strings *) -| Ident of string Region.reg -| Constr of string Region.reg -| Int of (string * Z.t) Region.reg -| Nat of (string * Z.t) Region.reg -| Mutez of (string * Z.t) Region.reg -| String of string Region.reg -| Bytes of (string * Hex.t) Region.reg -| Attr of string Region.reg +| Ident of string Region.reg +| Constr of string Region.reg +| Int of (string * Z.t) Region.reg +| Nat of (string * Z.t) Region.reg +| Mutez of (string * Z.t) Region.reg +| String of string Region.reg +| Verbatim of string Region.reg +| Bytes of (string * Hex.t) Region.reg +| Attr of string Region.reg (* Keywords *) @@ -108,6 +109,8 @@ let proj_token = function String Region.{region; value} -> region, sprintf "String %s" value +| Verbatim Region.{region; value} -> + region, sprintf "Verbatim {|%s|}" value | Bytes Region.{region; value = s,b} -> region, sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b) @@ -172,6 +175,7 @@ let to_lexeme = function (* Literals *) String s -> s.Region.value +| Verbatim v -> String.escaped v.Region.value | Bytes b -> fst b.Region.value | Int i | Nat i @@ -385,6 +389,8 @@ let line_comment_start lexeme = lexeme = "//" let mk_string lexeme region = String Region.{region; value=lexeme} +let mk_verbatim lexeme region = Verbatim Region.{region; value=lexeme} + let mk_bytes lexeme region = let norm = Str.(global_replace (regexp "_") "" lexeme) in let value = lexeme, `Hex norm diff --git a/src/passes/1-parser/reasonligo/ParToken.mly b/src/passes/1-parser/reasonligo/ParToken.mly index 8191b17ec..4d7dcc913 100644 --- a/src/passes/1-parser/reasonligo/ParToken.mly +++ b/src/passes/1-parser/reasonligo/ParToken.mly @@ -5,14 +5,15 @@ (* Literals *) -%token String "" -%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" -%token <(string * Z.t) Region.reg> Int "" -%token <(string * Z.t) Region.reg> Nat "" -%token <(string * Z.t) Region.reg> Mutez "" -%token Ident "" -%token Constr "" -%token Attr "" +%token String "" +%token Verbatim "" +%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes "" +%token <(string * Z.t) Region.reg> Int "" +%token <(string * Z.t) Region.reg> Nat "" +%token <(string * Z.t) Region.reg> Mutez "" +%token Ident "" +%token Constr "" +%token Attr "" (* Symbols *) diff --git a/src/passes/1-parser/reasonligo/Parser.mly b/src/passes/1-parser/reasonligo/Parser.mly index 702094ecd..2d2ffcc2f 100644 --- a/src/passes/1-parser/reasonligo/Parser.mly +++ b/src/passes/1-parser/reasonligo/Parser.mly @@ -357,19 +357,20 @@ sub_pattern: | core_pattern { $1 } core_pattern: - "" { PVar $1 } -| "_" { PWild $1 } -| unit { PUnit $1 } -| "" { PInt $1 } -| "" { PNat $1 } -| "" { PBytes $1 } -| "true" { PTrue $1 } -| "false" { PFalse $1 } -| "" { PString $1 } -| par(ptuple) { PPar $1 } + "" { PVar $1 } +| "_" { PWild $1 } +| unit { PUnit $1 } +| "" { PInt $1 } +| "" { PNat $1 } +| "" { PBytes $1 } +| "true" { PTrue $1 } +| "false" { PFalse $1 } +| "" { PString $1 } +| "" { PVerbatim $1 } +| par(ptuple) { PPar $1 } | list__(sub_pattern) { PList (PListComp $1) } -| constr_pattern { PConstr $1 } -| record_pattern { PRecord $1 } +| constr_pattern { PConstr $1 } +| record_pattern { PRecord $1 } record_pattern: "{" sep_or_term_list(field_pattern,",") "}" { @@ -802,6 +803,7 @@ common_expr: | "_" { EVar {value = "_"; region = $1} } | update_record { EUpdate $1 } | "" { EString (String $1) } +| "" { EString (Verbatim $1) } | unit { EUnit $1 } | "false" { ELogic (BoolExpr (False $1)) } | "true" { ELogic (BoolExpr (True $1)) } diff --git a/src/passes/1-parser/shared/Lexer.mli b/src/passes/1-parser/shared/Lexer.mli index d3993fec8..fd94773ed 100644 --- a/src/passes/1-parser/shared/Lexer.mli +++ b/src/passes/1-parser/shared/Lexer.mli @@ -69,16 +69,17 @@ module type TOKEN = (* Injections *) - val mk_int : lexeme -> Region.t -> (token, int_err) result - val mk_nat : lexeme -> Region.t -> (token, nat_err) result - val mk_mutez : lexeme -> Region.t -> (token, int_err) result - val mk_ident : lexeme -> Region.t -> (token, ident_err) result - val mk_sym : lexeme -> Region.t -> (token, sym_err) result - val mk_string : lexeme -> Region.t -> token - val mk_bytes : lexeme -> Region.t -> token - val mk_constr : lexeme -> Region.t -> token - val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result - val eof : Region.t -> token + val mk_int : lexeme -> Region.t -> (token, int_err) result + val mk_nat : lexeme -> Region.t -> (token, nat_err) result + val mk_mutez : lexeme -> Region.t -> (token, int_err) result + val mk_ident : lexeme -> Region.t -> (token, ident_err) result + val mk_sym : lexeme -> Region.t -> (token, sym_err) result + val mk_string : lexeme -> Region.t -> token + val mk_verbatim : lexeme -> Region.t -> token + val mk_bytes : lexeme -> Region.t -> token + val mk_constr : lexeme -> Region.t -> token + val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result + val eof : Region.t -> token (* Predicates *) diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index dcfe3d91c..05d2ee1f1 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -33,16 +33,17 @@ module type TOKEN = (* Injections *) - val mk_int : lexeme -> Region.t -> (token, int_err) result - val mk_nat : lexeme -> Region.t -> (token, nat_err) result - val mk_mutez : lexeme -> Region.t -> (token, int_err) result - val mk_ident : lexeme -> Region.t -> (token, ident_err) result - val mk_sym : lexeme -> Region.t -> (token, sym_err) result - val mk_string : lexeme -> Region.t -> token - val mk_bytes : lexeme -> Region.t -> token - val mk_constr : lexeme -> Region.t -> token - val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result - val eof : Region.t -> token + val mk_int : lexeme -> Region.t -> (token, int_err) result + val mk_nat : lexeme -> Region.t -> (token, nat_err) result + val mk_mutez : lexeme -> Region.t -> (token, int_err) result + val mk_ident : lexeme -> Region.t -> (token, ident_err) result + val mk_sym : lexeme -> Region.t -> (token, sym_err) result + val mk_string : lexeme -> Region.t -> token + val mk_verbatim : lexeme -> Region.t -> token + val mk_bytes : lexeme -> Region.t -> token + val mk_constr : lexeme -> Region.t -> token + val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result + val eof : Region.t -> token (* Predicates *) @@ -111,6 +112,7 @@ module Make (Token : TOKEN) : (S with module Token = Token) = | Unexpected_character of char | Undefined_escape_sequence | Unterminated_string + | Unterminated_verbatim | Unterminated_comment of string | Non_canonical_zero | Broken_string @@ -133,6 +135,9 @@ module Make (Token : TOKEN) : (S with module Token = Token) = | Unterminated_string -> "Unterminated string.\n\ Hint: Close with double quotes." + | Unterminated_verbatim -> + "Unterminated verbatim.\n\ + Hint: Close with \"|}\"." | Unterminated_comment ending -> sprintf "Unterminated comment.\n\ Hint: Close with \"%s\"." ending @@ -179,6 +184,14 @@ module Make (Token : TOKEN) : (S with module Token = Token) = let token = Token.mk_string lexeme region in state#enqueue token + let mk_verbatim (thread, state) = + let start = thread#opening#start in + let stop = state#pos in + let region = Region.make ~start ~stop in + let lexeme = thread#to_string in + let token = Token.mk_verbatim lexeme region + in state#enqueue token + let mk_bytes bytes state buffer = let region, _, state = state#sync buffer in let token = Token.mk_bytes bytes region @@ -414,10 +427,14 @@ and scan state = parse (* String *) -| '"' { let opening, lexeme, state = state#sync lexbuf in - let thread = LexerLib.mk_thread opening lexeme in +| '"' { let opening, _, state = state#sync lexbuf in + let thread = LexerLib.mk_thread opening "" in scan_string thread state lexbuf |> mk_string } +| "{|" { let opening, _, state = state#sync lexbuf in + let thread = LexerLib.mk_thread opening "" in + scan_verbatim thread state lexbuf |> mk_verbatim } + (* Comments *) | block_comment_openings { @@ -484,7 +501,7 @@ and scan_string thread state = parse { let region, _, _ = state#sync lexbuf in fail region Invalid_character_in_string } | '"' { let _, _, state = state#sync lexbuf - in thread#push_char '"', state } + in thread, state } | esc { let _, lexeme, state = state#sync lexbuf in let thread = thread#push_string lexeme in scan_string thread state lexbuf } @@ -493,6 +510,13 @@ and scan_string thread state = parse | _ as c { let _, _, state = state#sync lexbuf in scan_string (thread#push_char c) state lexbuf } +and scan_verbatim thread state = parse +| eof { fail thread#opening Unterminated_verbatim} +| "|}" { let _, _, state = state#sync lexbuf + in thread, state } +| _ as c { let _, _, state = state#sync lexbuf in + scan_verbatim (thread#push_char c) state lexbuf } + (* Finishing a block comment (For Emacs: ("(*") The lexing of block comments must take care of diff --git a/src/passes/10-interpreter/interpreter.ml b/src/passes/10-interpreter/interpreter.ml index 85c15c6fb..c76b464f7 100644 --- a/src/passes/10-interpreter/interpreter.ml +++ b/src/passes/10-interpreter/interpreter.ml @@ -258,7 +258,7 @@ and eval_literal : Ast_typed.literal -> value result = function | Literal_int i -> ok @@ V_Ct (C_int i) | Literal_nat n -> ok @@ V_Ct (C_nat n) | Literal_timestamp i -> ok @@ V_Ct (C_timestamp i) - | Literal_string s -> ok @@ V_Ct (C_string s) + | Literal_string s -> ok @@ V_Ct (C_string (Ligo_string.extract s)) | Literal_bytes s -> ok @@ V_Ct (C_bytes s) | Literal_mutez t -> ok @@ V_Ct (C_mutez t) | Literal_address s -> ok @@ V_Ct (C_address s) diff --git a/src/passes/10-transpiler/transpiler.ml b/src/passes/10-transpiler/transpiler.ml index aa3ae5680..0a3ae7360 100644 --- a/src/passes/10-transpiler/transpiler.ml +++ b/src/passes/10-transpiler/transpiler.ml @@ -372,7 +372,7 @@ let rec transpile_literal : AST.literal -> value = fun l -> match l with | Literal_timestamp n -> D_timestamp n | Literal_mutez n -> D_mutez n | Literal_bytes s -> D_bytes s - | Literal_string s -> D_string s + | Literal_string s -> D_string (Ligo_string.extract s) | Literal_address s -> D_string s | Literal_signature s -> D_string s | Literal_key s -> D_string s diff --git a/src/passes/10-transpiler/untranspiler.ml b/src/passes/10-transpiler/untranspiler.ml index 26e2dd287..a6864bff9 100644 --- a/src/passes/10-transpiler/untranspiler.ml +++ b/src/passes/10-transpiler/untranspiler.ml @@ -92,6 +92,7 @@ let rec untranspile (v : value) (t : AST.type_expression) : AST.expression resul let%bind n = trace_strong (wrong_mini_c_value "string" v) @@ get_string v in + let n = Ligo_string.Standard n in return (E_literal (Literal_string n)) ) | TC_bytes -> ( @@ -246,6 +247,7 @@ let rec untranspile (v : value) (t : AST.type_expression) : AST.expression resul let%bind n = trace_strong (wrong_mini_c_value "lambda as string" v) @@ get_string v in + let n = Ligo_string.Standard n in return (E_literal (Literal_string n)) | T_variable _ -> fail @@ corner_case ~loc:__LOC__ "trying to untranspile at variable type" diff --git a/src/passes/2-concrete_to_imperative/cameligo.ml b/src/passes/2-concrete_to_imperative/cameligo.ml index 11a714460..6562ed20c 100644 --- a/src/passes/2-concrete_to_imperative/cameligo.ml +++ b/src/passes/2-concrete_to_imperative/cameligo.ml @@ -169,7 +169,7 @@ open Operators.Concrete_to_imperative.Cameligo let r_split = Location.r_split let get_t_string_singleton_opt = function - | Raw.TStringLiteral s -> Some (String.(sub s.value 1 ((length s.value)-2))) + | Raw.TStringLiteral s -> Some s.value | _ -> None let rec pattern_to_var : Raw.pattern -> _ = fun p -> @@ -583,11 +583,11 @@ let rec compile_expression : | EArith (Neg e) -> compile_unop "NEG" e | EString (String s) -> ( let (s , loc) = r_split s in - let s' = - let s = s in - String.(sub s 1 ((length s) - 2)) - in - return @@ e_literal ~loc (Literal_string s') + return @@ e_literal ~loc (Literal_string (Standard s)) + ) + | EString (Verbatim v) -> ( + let (v , loc) = r_split v in + return @@ e_literal ~loc (Literal_string (Verbatim v)) ) | EString (Cat c) -> let (c, loc) = r_split c in diff --git a/src/passes/2-concrete_to_imperative/pascaligo.ml b/src/passes/2-concrete_to_imperative/pascaligo.ml index 9d27ec0bc..1d5d51ef3 100644 --- a/src/passes/2-concrete_to_imperative/pascaligo.ml +++ b/src/passes/2-concrete_to_imperative/pascaligo.ml @@ -152,7 +152,7 @@ let return_statement expr = ok @@ fun expr'_opt -> | Some expr' -> ok @@ e_sequence expr expr' let get_t_string_singleton_opt = function - | Raw.TStringLiteral s -> Some (String.(sub s.value 1 ((length s.value)-2))) + | Raw.TStringLiteral s -> Some s.value | _ -> None @@ -384,11 +384,10 @@ let rec compile_expression (t:Raw.expr) : expr result = | EArith (Neg e) -> compile_unop "NEG" e | EString (String s) -> let (s , loc) = r_split s in - let s' = - (* S contains quotes *) - String.(sub s 1 (length s - 2)) - in - return @@ e_literal ~loc (Literal_string s') + return @@ e_literal ~loc (Literal_string (Standard s)) + | EString (Verbatim v) -> + let (v , loc) = r_split v in + return @@ e_literal ~loc (Literal_string (Verbatim v)) | EString (Cat bo) -> let (bo , loc) = r_split bo in let%bind sl = compile_expression bo.arg1 in diff --git a/src/passes/3-self_ast_imperative/tezos_type_annotation.ml b/src/passes/3-self_ast_imperative/tezos_type_annotation.ml index 133cdd9a6..163e061d2 100644 --- a/src/passes/3-self_ast_imperative/tezos_type_annotation.ml +++ b/src/passes/3-self_ast_imperative/tezos_type_annotation.ml @@ -17,21 +17,23 @@ let peephole_expression : expression -> expression result = fun e -> match e.expression_content with | E_ascription {anno_expr=e'; type_annotation=t} as e -> ( match (e'.expression_content , t.type_content) with - | (E_literal (Literal_string s) , T_constant (TC_key_hash)) -> return @@ E_literal (Literal_key_hash s) - | (E_literal (Literal_string s) , T_constant (TC_signature)) -> return @@ E_literal (Literal_signature s) - | (E_literal (Literal_string s) , T_constant (TC_key)) -> return @@ E_literal (Literal_key s) - | (E_literal (Literal_int i) , T_constant (TC_timestamp)) -> return @@ E_literal (Literal_timestamp i) + | (E_literal (Literal_string s) , T_constant (TC_key_hash)) -> return @@ E_literal (Literal_key_hash (Ligo_string.extract s)) + | (E_literal (Literal_string s) , T_constant (TC_signature)) -> return @@ E_literal (Literal_signature (Ligo_string.extract s)) + | (E_literal (Literal_string s) , T_constant (TC_key)) -> return @@ E_literal (Literal_key (Ligo_string.extract s)) + | (E_literal (Literal_int i) , T_constant (TC_timestamp)) -> return @@ E_literal (Literal_timestamp i) | (E_literal (Literal_string str) , T_constant (TC_timestamp)) -> + let str = Ligo_string.extract str in let%bind time = trace_option (bad_string_timestamp str e'.location) @@ Memory_proto_alpha.Protocol.Alpha_context.Timestamp.of_notation str in let itime = Z.of_int64 @@ Tezos_utils.Time.Protocol.to_seconds time in return @@ E_literal (Literal_timestamp itime) - | (E_literal (Literal_string str) , T_constant (TC_address)) -> return @@ E_literal (Literal_address str) + | (E_literal (Literal_string str) , T_constant (TC_address)) -> return @@ E_literal (Literal_address (Ligo_string.extract str)) | (E_literal (Literal_string str) , T_constant (TC_bytes)) -> ( - let%bind e' = e'_bytes str in - return e' - ) + let str = Ligo_string.extract str in + let%bind e' = e'_bytes str in + return e' + ) | _ -> return e ) | e -> return e diff --git a/src/passes/9-self_ast_typed/contract_passes.ml b/src/passes/9-self_ast_typed/contract_passes.ml index a98d466e8..764319a2c 100644 --- a/src/passes/9-self_ast_typed/contract_passes.ml +++ b/src/passes/9-self_ast_typed/contract_passes.ml @@ -59,7 +59,7 @@ let self_typing : contract_pass_data -> expression -> (bool * contract_pass_data match e.expression_content , e.type_expression with | E_constant {cons_name=C_SELF ; arguments=[entrypoint_exp]}, {type_content = T_operator (TC_contract t) ; type_meta=_} -> let%bind entrypoint = match entrypoint_exp.expression_content with - | E_literal (Literal_string ep) -> check_entrypoint_annotation_format ep entrypoint_exp + | E_literal (Literal_string ep) -> check_entrypoint_annotation_format (Ligo_string.extract ep) entrypoint_exp | _ -> fail @@ Errors.entrypoint_annotation_not_literal entrypoint_exp.location in let%bind entrypoint_t = match dat.contract_type.parameter.type_content with | T_sum cmap -> diff --git a/src/stages/1-ast_imperative/combinators.ml b/src/stages/1-ast_imperative/combinators.ml index 15a611ba2..4a4e88ed3 100644 --- a/src/stages/1-ast_imperative/combinators.ml +++ b/src/stages/1-ast_imperative/combinators.ml @@ -96,7 +96,8 @@ let e_nat_z ?loc n : expression = make_e ?loc @@ E_literal (Literal_nat n) let e_nat ?loc n : expression = e_nat_z ?loc @@ Z.of_int n let e_timestamp_z ?loc n : expression = make_e ?loc @@ E_literal (Literal_timestamp n) let e_timestamp ?loc n : expression = e_timestamp_z ?loc @@ Z.of_int n -let e_string ?loc s : expression = make_e ?loc @@ E_literal (Literal_string s) +let e_string ?loc s : expression = make_e ?loc @@ E_literal (Literal_string (Standard s)) +let e_verbatim ?loc v : expression = make_e ?loc @@ E_literal (Literal_string (Verbatim v)) let e_address ?loc s : expression = make_e ?loc @@ E_literal (Literal_address s) let e_mutez_z ?loc s : expression = make_e ?loc @@ E_literal (Literal_mutez s) let e_mutez ?loc s : expression = e_mutez_z ?loc @@ Z.of_int s diff --git a/src/stages/1-ast_imperative/combinators.mli b/src/stages/1-ast_imperative/combinators.mli index a35375610..9e1a28b5b 100644 --- a/src/stages/1-ast_imperative/combinators.mli +++ b/src/stages/1-ast_imperative/combinators.mli @@ -62,6 +62,7 @@ val e_nat : ?loc:Location.t -> int -> expression val e_timestamp : ?loc:Location.t -> int -> expression val e_bool : ?loc:Location.t -> bool -> expression val e_string : ?loc:Location.t -> string -> expression +val e_verbatim : ?loc:Location.t -> string -> expression val e_address : ?loc:Location.t -> string -> expression val e_signature : ?loc:Location.t -> string -> expression val e_key : ?loc:Location.t -> string -> expression diff --git a/src/stages/2-ast_sugar/combinators.mli b/src/stages/2-ast_sugar/combinators.mli index 95e937fab..3faebef21 100644 --- a/src/stages/2-ast_sugar/combinators.mli +++ b/src/stages/2-ast_sugar/combinators.mli @@ -53,7 +53,7 @@ val e_int : ?loc:Location.t -> Z.t -> expression val e_nat : ?loc:Location.t -> Z.t -> expression val e_timestamp : ?loc:Location.t -> Z.t -> expression val e_bool : ?loc:Location.t -> bool -> expression -val e_string : ?loc:Location.t -> string -> expression +val e_string : ?loc:Location.t -> ligo_string -> expression val e_address : ?loc:Location.t -> string -> expression val e_signature : ?loc:Location.t -> string -> expression val e_key : ?loc:Location.t -> string -> expression diff --git a/src/stages/3-ast_core/combinators.mli b/src/stages/3-ast_core/combinators.mli index 1a7ec2efa..a271991da 100644 --- a/src/stages/3-ast_core/combinators.mli +++ b/src/stages/3-ast_core/combinators.mli @@ -54,7 +54,7 @@ val e_int : ?loc:Location.t -> Z.t -> expression val e_nat : ?loc:Location.t -> Z.t -> expression val e_timestamp : ?loc:Location.t -> Z.t -> expression val e_bool : ?loc:Location.t -> bool -> expression -val e_string : ?loc:Location.t -> string -> expression +val e_string : ?loc:Location.t -> ligo_string -> expression val e_address : ?loc:Location.t -> string -> expression val e_signature : ?loc:Location.t -> string -> expression val e_key : ?loc:Location.t -> string -> expression diff --git a/src/stages/4-ast_typed/PP.ml b/src/stages/4-ast_typed/PP.ml index 23c2e3b01..5691cac65 100644 --- a/src/stages/4-ast_typed/PP.ml +++ b/src/stages/4-ast_typed/PP.ml @@ -188,7 +188,7 @@ let literal ppf (l : literal) = | Literal_nat z -> fprintf ppf "+%a" Z.pp_print z | Literal_timestamp z -> fprintf ppf "+%a" Z.pp_print z | Literal_mutez z -> fprintf ppf "%amutez" Z.pp_print z - | Literal_string s -> fprintf ppf "%S" s + | Literal_string s -> fprintf ppf "%a" Ligo_string.pp s | Literal_bytes b -> fprintf ppf "0x%a" Hex.pp (Hex.of_bytes b) | Literal_address s -> fprintf ppf "@%S" s | Literal_operation _ -> fprintf ppf "Operation(...bytes)" diff --git a/src/stages/4-ast_typed/PP_generic.ml b/src/stages/4-ast_typed/PP_generic.ml index b2a2bf179..c36fcebcb 100644 --- a/src/stages/4-ast_typed/PP_generic.ml +++ b/src/stages/4-ast_typed/PP_generic.ml @@ -15,6 +15,7 @@ let needs_parens = { int = (fun _ _ _ -> false) ; z = (fun _ _ _ -> false) ; string = (fun _ _ _ -> false) ; + ligo_string = (fun _ _ _ -> false) ; bytes = (fun _ _ _ -> false) ; unit = (fun _ _ _ -> false) ; packed_internal_operation = (fun _ _ _ -> false) ; @@ -54,6 +55,7 @@ let op ppf = { bool = (fun _visitor () b -> fprintf ppf "%s" (if b then "true" else "false")) ; z = (fun _visitor () i -> fprintf ppf "%a" Z.pp_print i) ; string = (fun _visitor () str -> fprintf ppf "\"%s\"" str) ; + ligo_string = (fun _visitor () str -> fprintf ppf "%a" Ligo_string.pp str) ; bytes = (fun _visitor () _bytes -> fprintf ppf "bytes...") ; unit = (fun _visitor () () -> fprintf ppf "()") ; packed_internal_operation = (fun _visitor () _op -> fprintf ppf "Operation(...bytes)") ; diff --git a/src/stages/4-ast_typed/combinators.mli b/src/stages/4-ast_typed/combinators.mli index 7568455d5..192939c72 100644 --- a/src/stages/4-ast_typed/combinators.mli +++ b/src/stages/4-ast_typed/combinators.mli @@ -120,7 +120,7 @@ val e_int : Z.t -> expression_content val e_nat : Z.t -> expression_content val e_mutez : Z.t -> expression_content val e_bool : bool -> environment -> expression_content -val e_string : string -> expression_content +val e_string : ligo_string -> expression_content val e_bytes : bytes -> expression_content val e_timestamp : Z.t -> expression_content val e_address : string -> expression_content @@ -140,7 +140,7 @@ val e_a_int : Z.t -> environment -> expression val e_a_nat : Z.t -> environment -> expression val e_a_mutez : Z.t -> environment -> expression val e_a_bool : bool -> environment -> expression -val e_a_string : string -> environment -> expression +val e_a_string : ligo_string -> environment -> expression val e_a_address : string -> environment -> expression val e_a_pair : expression -> expression -> environment -> expression val e_a_some : expression -> environment -> expression diff --git a/src/stages/4-ast_typed/combinators_environment.mli b/src/stages/4-ast_typed/combinators_environment.mli index 861d67083..64b325975 100644 --- a/src/stages/4-ast_typed/combinators_environment.mli +++ b/src/stages/4-ast_typed/combinators_environment.mli @@ -7,7 +7,7 @@ val e_a_empty_int : Z.t -> expression val e_a_empty_nat : Z.t -> expression val e_a_empty_mutez : Z.t -> expression val e_a_empty_bool : bool -> expression -val e_a_empty_string : string -> expression +val e_a_empty_string : ligo_string -> expression val e_a_empty_address : string -> expression val e_a_empty_pair : expression -> expression -> expression val e_a_empty_some : expression -> expression diff --git a/src/stages/4-ast_typed/types.ml b/src/stages/4-ast_typed/types.ml index 195efe054..be7a7a287 100644 --- a/src/stages/4-ast_typed/types.ml +++ b/src/stages/4-ast_typed/types.ml @@ -80,7 +80,7 @@ type literal = | Literal_nat of z | Literal_timestamp of z | Literal_mutez of z - | Literal_string of string + | Literal_string of ligo_string | Literal_bytes of bytes | Literal_address of string | Literal_signature of string diff --git a/src/stages/4-ast_typed/types_utils.ml b/src/stages/4-ast_typed/types_utils.ml index b9367fa0c..2c77f5c7d 100644 --- a/src/stages/4-ast_typed/types_utils.ml +++ b/src/stages/4-ast_typed/types_utils.ml @@ -11,6 +11,7 @@ type expression_variable = Stage_common.Types.expression_variable type type_ = Stage_common.Types.type_ type type_variable = Stage_common.Types.type_variable type z = Z.t +type ligo_string = Stage_common.Types.ligo_string type constructor' = | Constructor of string diff --git a/src/stages/common/PP.ml b/src/stages/common/PP.ml index 2e72e085e..579c2a327 100644 --- a/src/stages/common/PP.ml +++ b/src/stages/common/PP.ml @@ -138,7 +138,7 @@ let literal ppf (l : literal) = | Literal_nat z -> fprintf ppf "+%a" Z.pp_print z | Literal_timestamp z -> fprintf ppf "+%a" Z.pp_print z | Literal_mutez z -> fprintf ppf "%amutez" Z.pp_print z - | Literal_string s -> fprintf ppf "%S" s + | Literal_string s -> fprintf ppf "%a" Ligo_string.pp s | Literal_bytes b -> fprintf ppf "0x%a" Hex.pp (Hex.of_bytes b) | Literal_address s -> fprintf ppf "@%S" s | Literal_operation _ -> fprintf ppf "Operation(...bytes)" diff --git a/src/stages/common/types.ml b/src/stages/common/types.ml index b97a9357d..cfa765fa8 100644 --- a/src/stages/common/types.ml +++ b/src/stages/common/types.ml @@ -3,6 +3,7 @@ and expression_variable = expression_ Var.t type type_ and type_variable = type_ Var.t +type ligo_string = Simple_utils.Ligo_string.t type constructor' = Constructor of string type label = Label of string @@ -178,7 +179,7 @@ type literal = | Literal_nat of Z.t | Literal_timestamp of Z.t | Literal_mutez of Z.t - | Literal_string of string + | Literal_string of ligo_string | Literal_bytes of bytes | Literal_address of string | Literal_signature of string diff --git a/src/test/contracts/string.ligo b/src/test/contracts/string.ligo index c563cd16c..d0e9123e6 100644 --- a/src/test/contracts/string.ligo +++ b/src/test/contracts/string.ligo @@ -1,3 +1,4 @@ const s : string = "toto" const x : string = s ^ "bar" const y : string = "foo" ^ x +const v : string = {|deadbeef|} diff --git a/src/test/typer_tests.ml b/src/test/typer_tests.ml index 92df6e911..e4ea12df5 100644 --- a/src/test/typer_tests.ml +++ b/src/test/typer_tests.ml @@ -39,7 +39,7 @@ module TestExpressions = struct let unit () : unit result = test_expression I.(e_unit ()) O.(t_unit ()) let int () : unit result = test_expression I.(e_int (Z.of_int 32)) O.(t_int ()) let bool () : unit result = test_expression I.(e_bool true) O.(t_bool ()) - let string () : unit result = test_expression I.(e_string "s") O.(t_string ()) + let string () : unit result = test_expression I.(e_string (Standard "s")) O.(t_string ()) let bytes () : unit result = let%bind b = I.e_bytes_hex "0b" in test_expression b O.(t_bytes ()) @@ -51,7 +51,7 @@ module TestExpressions = struct let tuple () : unit result = test_expression - I.(e_record @@ LMap.of_list [(Label "0",e_int (Z.of_int 32)); (Label "1",e_string "foo")]) + I.(e_record @@ LMap.of_list [(Label "0",e_int (Z.of_int 32)); (Label "1", e_string (Standard "foo"))]) O.(make_t_ez_record [("0",t_int ()); ("1",t_string ())]) let constructor () : unit result = @@ -65,7 +65,7 @@ module TestExpressions = struct let record () : unit result = test_expression - I.(e_record @@ LMap.of_list [(Label "foo", e_int (Z.of_int 32)); (Label "bar", e_string "foo")]) + I.(e_record @@ LMap.of_list [(Label "foo", e_int (Z.of_int 32)); (Label "bar", e_string (Standard "foo"))]) O.(make_t_ez_record [("foo", t_int ()); ("bar", t_string ())])