From 531dd238a78bba45f1a2cb08822da74a9a095cd0 Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Mon, 16 Dec 2019 17:37:46 +0100 Subject: [PATCH 1/2] Added unlexer for PascaLIGO. --- src/passes/1-parser/pascaligo/.Parser.mly.tag | 2 +- src/passes/1-parser/pascaligo/.unlexer.tag | 0 src/passes/1-parser/pascaligo/unlexer.ml | 121 ++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 src/passes/1-parser/pascaligo/.unlexer.tag create mode 100644 src/passes/1-parser/pascaligo/unlexer.ml diff --git a/src/passes/1-parser/pascaligo/.Parser.mly.tag b/src/passes/1-parser/pascaligo/.Parser.mly.tag index 100f7bb69..ab6790b0f 100644 --- a/src/passes/1-parser/pascaligo/.Parser.mly.tag +++ b/src/passes/1-parser/pascaligo/.Parser.mly.tag @@ -1 +1 @@ ---explain --external-tokens LexToken --base Parser ParToken.mly +--table --explain --external-tokens LexToken --base Parser ParToken.mly diff --git a/src/passes/1-parser/pascaligo/.unlexer.tag b/src/passes/1-parser/pascaligo/.unlexer.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/passes/1-parser/pascaligo/unlexer.ml b/src/passes/1-parser/pascaligo/unlexer.ml new file mode 100644 index 000000000..0ee7da436 --- /dev/null +++ b/src/passes/1-parser/pascaligo/unlexer.ml @@ -0,0 +1,121 @@ +(** Converting the textual representation of tokens produced by Menhir + into concrete syntax *) + +(* See [ParToken.mly] *) + +let gen_sym prefix = + let count = ref 0 in + fun () -> incr count; + prefix ^ string_of_int !count + +let id_sym = gen_sym "id" +and ctor_sym = gen_sym "C" + +let concrete = function + (* Keywords *) + + "And" -> "and" +| "Begin" -> "begin" +| "BigMap" -> "big_map" +| "Block" -> "block" +| "Case" -> "case" +| "Const" -> "const" +| "Contains" -> "contains" +| "Else" -> "else" +| "End" -> "end" +| "False" -> "False" +| "For" -> "for" +| "Function" -> "function" +| "From" -> "from" +| "If" -> "if" +| "In" -> "in" +| "Is" -> "is" +| "List" -> "list" +| "Map" -> "map" +| "Mod" -> "mod" +| "Nil" -> "nil" +| "Not" -> "not" +| "Of" -> "of" +| "Or" -> "or" +| "Patch" -> "patch" +| "Record" -> "record" +| "Remove" -> "remove" +| "Set" -> "set" +| "Skip" -> "skip" +| "Then" -> "then" +| "To" -> "to" +| "True" -> "True" +| "Type" -> "type" +| "Unit" -> "Unit" +| "Var" -> "var" +| "While" -> "while" +| "With" -> "with" + + (* Data constructors *) + +| "C_None" -> "None" +| "C_Some" -> "Some" + + (* Symbols *) + +| "SEMI" -> ";" +| "COMMA" -> "," +| "LPAR" -> "(" +| "RPAR" -> ")" +| "LBRACE" -> "{" +| "RBRACE" -> "}" +| "LBRACKET" -> "[" +| "RBRACKET" -> "]" +| "CONS" -> "#" +| "VBAR" -> "|" +| "ARROW" -> "->" +| "ASS" -> ":=" +| "EQ" -> "=" +| "COLON" -> ":" +| "LT" -> "<" +| "LE" -> "<=" +| "GT" -> ">" +| "GE" -> ">=" +| "NE" -> "=/=" +| "PLUS" -> "+" +| "MINUS" -> " -" +| "SLASH" -> "/" +| "TIMES" -> "*" +| "DOT" -> "." +| "WILD" -> "_" +| "CAT" -> "^" + + (* Literals *) + +| "String" -> "\"a string\"" +| "Bytes" -> "0xAA" +| "Int" -> "1" +| "Nat" -> "1n" +| "Mutez" -> "1mutez" +| "Ident" -> id_sym () +| "Constr" -> ctor_sym () + + (* Virtual tokens *) + +| "EOF" -> "" + + (* For completeness of open sum types *) + +| _ -> "" + +(* Unlexing a sentence *) + +let unlex (sentence: string) : Buffer.t = + let tokens = Str.split (Str.regexp " ") sentence in + let lexemes = List.map concrete tokens in + let buffer = Buffer.create 31 in + let rec trans = function + [] -> () + | [s] -> Buffer.add_string buffer s + | s::l -> Buffer.add_string buffer (s ^ " "); trans l + in trans lexemes; buffer + +(* Reading one line from the standard input channel and unlex it. *) + +let out = unlex (input_line stdin) |> Buffer.contents +let () = Printf.printf "%s\n" out From 757b0da78c534141d10061a834ac7615ca442b08 Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Tue, 17 Dec 2019 14:56:16 +0100 Subject: [PATCH 2/2] Removed the open type [Error.t] (less [assert false]). I also had to remove the keywords [Down], [Fail] and [Step] in PascaLIGO that made a mysterious and unwanted come back. (I did not bother with [git blame]). --- src/passes/1-parser/cameligo/.links | 1 - src/passes/1-parser/cameligo/ParserMain.ml | 11 +++--- src/passes/1-parser/pascaligo/.links | 1 - src/passes/1-parser/pascaligo/LexToken.mli | 3 -- src/passes/1-parser/pascaligo/LexToken.mll | 15 ------- src/passes/1-parser/pascaligo/ParserMain.ml | 11 +++--- src/passes/1-parser/reasonligo/.links | 1 - src/passes/1-parser/reasonligo/ParserMain.ml | 11 +++--- src/passes/1-parser/shared/Error.mli | 3 -- src/passes/1-parser/shared/Lexer.mli | 6 ++- src/passes/1-parser/shared/Lexer.mll | 41 ++++++++++---------- src/passes/1-parser/shared/dune | 9 +---- 12 files changed, 42 insertions(+), 71 deletions(-) delete mode 100644 src/passes/1-parser/shared/Error.mli diff --git a/src/passes/1-parser/cameligo/.links b/src/passes/1-parser/cameligo/.links index f0fdfb646..eca6c8680 100644 --- a/src/passes/1-parser/cameligo/.links +++ b/src/passes/1-parser/cameligo/.links @@ -6,7 +6,6 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml ../shared/Lexer.mli ../shared/Lexer.mll -../shared/Error.mli ../shared/EvalOpt.ml ../shared/EvalOpt.mli ../shared/FQueue.ml diff --git a/src/passes/1-parser/cameligo/ParserMain.ml b/src/passes/1-parser/cameligo/ParserMain.ml index e683b15d1..e1e35850b 100644 --- a/src/passes/1-parser/cameligo/ParserMain.ml +++ b/src/passes/1-parser/cameligo/ParserMain.ml @@ -25,15 +25,14 @@ let () = Printexc.record_backtrace true let external_ text = Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; -type Error.t += ParseError +type error = SyntaxError let error_to_string = function - ParseError -> "Syntax error.\n" -| _ -> assert false + SyntaxError -> "Syntax error.\n" let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value in - let reg = region#to_string ~file ~offsets mode in + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) (** {1 Preprocessing the input source and opening the input channels} *) @@ -126,7 +125,7 @@ let () = options#mode err ~file | Parser.Error -> let region = get_last () in - let error = Region.{region; value=ParseError} in + let error = Region.{region; value=SyntaxError} in let () = close_all () in print_error ~offsets:options#offsets options#mode error ~file diff --git a/src/passes/1-parser/pascaligo/.links b/src/passes/1-parser/pascaligo/.links index f0fdfb646..eca6c8680 100644 --- a/src/passes/1-parser/pascaligo/.links +++ b/src/passes/1-parser/pascaligo/.links @@ -6,7 +6,6 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml ../shared/Lexer.mli ../shared/Lexer.mll -../shared/Error.mli ../shared/EvalOpt.ml ../shared/EvalOpt.mli ../shared/FQueue.ml diff --git a/src/passes/1-parser/pascaligo/LexToken.mli b/src/passes/1-parser/pascaligo/LexToken.mli index b1865faad..aa906f8d8 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mli +++ b/src/passes/1-parser/pascaligo/LexToken.mli @@ -77,10 +77,8 @@ type t = | Case of Region.t (* "case" *) | Const of Region.t (* "const" *) | Contains of Region.t (* "contains" *) -| Down of Region.t (* "down" *) | Else of Region.t (* "else" *) | End of Region.t (* "end" *) -| Fail of Region.t (* "fail" *) | False of Region.t (* "False" *) | For of Region.t (* "for" *) | From of Region.t (* "from" *) @@ -100,7 +98,6 @@ type t = | Remove of Region.t (* "remove" *) | Set of Region.t (* "set" *) | Skip of Region.t (* "skip" *) -| Step of Region.t (* "step" *) | Then of Region.t (* "then" *) | To of Region.t (* "to" *) | True of Region.t (* "True" *) diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index 67d2c0ed9..16f4dd96a 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -75,10 +75,8 @@ type t = | Case of Region.t (* "case" *) | Const of Region.t (* "const" *) | Contains of Region.t (* "contains" *) -| Down of Region.t (* "down" *) | Else of Region.t (* "else" *) | End of Region.t (* "end" *) -| Fail of Region.t (* "fail" *) | False of Region.t (* "False" *) | For of Region.t (* "for" *) | From of Region.t (* "from" *) @@ -98,7 +96,6 @@ type t = | Remove of Region.t (* "remove" *) | Set of Region.t (* "set" *) | Skip of Region.t (* "skip" *) -| Step of Region.t (* "step" *) | Then of Region.t (* "then" *) | To of Region.t (* "to" *) | True of Region.t (* "True" *) @@ -184,10 +181,8 @@ let proj_token = function | Case region -> region, "Case" | Const region -> region, "Const" | Contains region -> region, "Contains" -| Down region -> region, "Down" | Else region -> region, "Else" | End region -> region, "End" -| Fail region -> region, "Fail" | False region -> region, "False" | For region -> region, "For" | From region -> region, "From" @@ -207,7 +202,6 @@ let proj_token = function | Remove region -> region, "Remove" | Set region -> region, "Set" | Skip region -> region, "Skip" -| Step region -> region, "Step" | Then region -> region, "Then" | To region -> region, "To" | True region -> region, "True" @@ -276,10 +270,8 @@ let to_lexeme = function | Case _ -> "case" | Const _ -> "const" | Contains _ -> "contains" -| Down _ -> "down" | Else _ -> "else" | End _ -> "end" -| Fail _ -> "fail" | False _ -> "False" | For _ -> "for" | From _ -> "from" @@ -299,7 +291,6 @@ let to_lexeme = function | Remove _ -> "remove" | Set _ -> "set" | Skip _ -> "skip" -| Step _ -> "step" | Then _ -> "then" | To _ -> "to" | True _ -> "True" @@ -336,13 +327,11 @@ let keywords = [ (fun reg -> Case reg); (fun reg -> Const reg); (fun reg -> Contains reg); - (fun reg -> Down reg); (fun reg -> Else reg); (fun reg -> End reg); (fun reg -> For reg); (fun reg -> From reg); (fun reg -> Function reg); - (fun reg -> Fail reg); (fun reg -> False reg); (fun reg -> If reg); (fun reg -> In reg); @@ -360,7 +349,6 @@ let keywords = [ (fun reg -> Remove reg); (fun reg -> Set reg); (fun reg -> Skip reg); - (fun reg -> Step reg); (fun reg -> Then reg); (fun reg -> To reg); (fun reg -> True reg); @@ -560,10 +548,8 @@ let is_kwd = function | Case _ | Const _ | Contains _ -| Down _ | Else _ | End _ -| Fail _ | False _ | For _ | From _ @@ -583,7 +569,6 @@ let is_kwd = function | Remove _ | Set _ | Skip _ -| Step _ | Then _ | To _ | True _ diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index 130cfbb23..295d460d8 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -25,15 +25,14 @@ let () = Printexc.record_backtrace true let external_ text = Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; -type Error.t += ParseError +type error = SyntaxError let error_to_string = function - ParseError -> "Syntax error.\n" -| _ -> assert false + SyntaxError -> "Syntax error.\n" let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value in - let reg = region#to_string ~file ~offsets mode in + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) (** {1 Preprocessing the input source and opening the input channels} *) @@ -126,7 +125,7 @@ let () = options#mode err ~file | Parser.Error -> let region = get_last () in - let error = Region.{region; value=ParseError} in + let error = Region.{region; value=SyntaxError} in let () = close_all () in print_error ~offsets:options#offsets options#mode error ~file diff --git a/src/passes/1-parser/reasonligo/.links b/src/passes/1-parser/reasonligo/.links index 09ca1c65f..e827ae13e 100644 --- a/src/passes/1-parser/reasonligo/.links +++ b/src/passes/1-parser/reasonligo/.links @@ -6,7 +6,6 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml ../shared/Lexer.mli ../shared/Lexer.mll -../shared/Error.mli ../shared/EvalOpt.ml ../shared/EvalOpt.mli ../shared/FQueue.ml diff --git a/src/passes/1-parser/reasonligo/ParserMain.ml b/src/passes/1-parser/reasonligo/ParserMain.ml index 30fd040dd..f4e8058cd 100644 --- a/src/passes/1-parser/reasonligo/ParserMain.ml +++ b/src/passes/1-parser/reasonligo/ParserMain.ml @@ -25,15 +25,14 @@ let () = Printexc.record_backtrace true let external_ text = Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; -type Error.t += ParseError +type error = SyntaxError let error_to_string = function - ParseError -> "Syntax error.\n" -| _ -> assert false + SyntaxError -> "Syntax error.\n" let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value in - let reg = region#to_string ~file ~offsets mode in + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) (** {1 Preprocessing the input source and opening the input channels} *) @@ -126,7 +125,7 @@ let () = options#mode err ~file | Parser.Error -> let region = get_last () in - let error = Region.{region; value=ParseError} in + let error = Region.{region; value=SyntaxError} in let () = close_all () in print_error ~offsets:options#offsets options#mode error ~file diff --git a/src/passes/1-parser/shared/Error.mli b/src/passes/1-parser/shared/Error.mli deleted file mode 100644 index 19c1ce4c9..000000000 --- a/src/passes/1-parser/shared/Error.mli +++ /dev/null @@ -1,3 +0,0 @@ -type t = .. - -type error = t diff --git a/src/passes/1-parser/shared/Lexer.mli b/src/passes/1-parser/shared/Lexer.mli index cc0359998..50754e45f 100644 --- a/src/passes/1-parser/shared/Lexer.mli +++ b/src/passes/1-parser/shared/Lexer.mli @@ -136,11 +136,13 @@ module type S = (* Error reporting *) - exception Error of Error.t Region.reg + type error + + exception Error of error Region.reg val print_error : ?offsets:bool -> [`Byte | `Point] -> - Error.t Region.reg -> file:bool -> unit + error Region.reg -> file:bool -> unit end diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index 41d95b432..1e8e382fa 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -159,10 +159,11 @@ module type S = sig (* Error reporting *) - exception Error of Error.t Region.reg + type error + exception Error of error Region.reg val print_error : ?offsets:bool -> [`Byte | `Point] -> - Error.t Region.reg -> file:bool -> unit + error Region.reg -> file:bool -> unit end (* The functorised interface @@ -330,22 +331,23 @@ module Make (Token: TOKEN) : (S with module Token = Token) = (* ERRORS *) - type Error.t += Invalid_utf8_sequence - type Error.t += Unexpected_character of char - type Error.t += Undefined_escape_sequence - type Error.t += Missing_break - type Error.t += Unterminated_string - type Error.t += Unterminated_integer - type Error.t += Odd_lengthed_bytes - type Error.t += Unterminated_comment - type Error.t += Orphan_minus - type Error.t += Non_canonical_zero - type Error.t += Negative_byte_sequence - type Error.t += Broken_string - type Error.t += Invalid_character_in_string - type Error.t += Reserved_name - type Error.t += Invalid_symbol - type Error.t += Invalid_natural + type error = + Invalid_utf8_sequence + | Unexpected_character of char + | Undefined_escape_sequence + | Missing_break + | Unterminated_string + | Unterminated_integer + | Odd_lengthed_bytes + | Unterminated_comment + | Orphan_minus + | Non_canonical_zero + | Negative_byte_sequence + | Broken_string + | Invalid_character_in_string + | Reserved_name + | Invalid_symbol + | Invalid_natural let error_to_string = function Invalid_utf8_sequence -> @@ -393,9 +395,8 @@ module Make (Token: TOKEN) : (S with module Token = Token) = Hint: Check the LIGO syntax you use.\n" | Invalid_natural -> "Invalid natural." - | _ -> assert false - exception Error of Error.t Region.reg + exception Error of error Region.reg let print_error ?(offsets=true) mode Region.{region; value} ~file = let msg = error_to_string value in diff --git a/src/passes/1-parser/shared/dune b/src/passes/1-parser/shared/dune index 3d763b1df..2dafdbd17 100644 --- a/src/passes/1-parser/shared/dune +++ b/src/passes/1-parser/shared/dune @@ -7,20 +7,15 @@ simple-utils uutf getopt - zarith - ) + zarith) (modules - Error Lexer LexerLog Utils Markup FQueue EvalOpt - Version - ) - (modules_without_implementation Error) -) + Version)) (rule (targets Version.ml)