From 07a70b464804aa6788b5121550523bc6c7e4028b Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Thu, 26 Dec 2019 13:31:54 +0100 Subject: [PATCH] Added unlexing to ReasonLIGO. --- src/passes/1-parser/reasonligo/.unlexer.tag | 0 src/passes/1-parser/reasonligo/dune | 5 + src/passes/1-parser/reasonligo/unlexer.ml | 103 ++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 src/passes/1-parser/reasonligo/.unlexer.tag create mode 100644 src/passes/1-parser/reasonligo/unlexer.ml diff --git a/src/passes/1-parser/reasonligo/.unlexer.tag b/src/passes/1-parser/reasonligo/.unlexer.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/passes/1-parser/reasonligo/dune b/src/passes/1-parser/reasonligo/dune index 57fd2b818..09ce70c68 100644 --- a/src/passes/1-parser/reasonligo/dune +++ b/src/passes/1-parser/reasonligo/dune @@ -38,3 +38,8 @@ ParserAPI ParserMain) (flags (:standard -open Simple_utils -open Parser_cameligo -open Parser_shared -open Parser_reasonligo))) + +(executable + (name Unlexer) + (libraries str) + (modules Unlexer)) diff --git a/src/passes/1-parser/reasonligo/unlexer.ml b/src/passes/1-parser/reasonligo/unlexer.ml new file mode 100644 index 000000000..6628024d9 --- /dev/null +++ b/src/passes/1-parser/reasonligo/unlexer.ml @@ -0,0 +1,103 @@ +(** Converting the textual representation of tokens produced by Menhir + into concrete syntax *) + +(* See [ParToken.mly] *) + +let gen_sym prefix = + let count = ref 0 in + fun () -> incr count; + prefix ^ string_of_int !count + +let id_sym = gen_sym "id" +and ctor_sym = gen_sym "C" + +let concrete = function + (* Keywords *) + +| "Else" -> "else" +| "False" -> "false" +| "If" -> "if" +| "Let" -> "let" +| "Switch" -> "switch" +| "Mod" -> "mod" +| "Or" -> "or" +| "True" -> "true" +| "Type" -> "type" + + (* Data constructors *) + +| "C_None" -> "None" +| "C_Some" -> "Some" + + (* Symbols *) + +| "MINUS" -> "-" +| "PLUS" -> "+" +| "SLASH" -> "/" +| "TIMES" -> "*" + +| "LPAR" -> "(" +| "RPAR" -> ")" +| "LBRACKET" -> "[" +| "RBRACKET" -> "]" +| "LBRACE" -> "{" +| "RBRACE" -> "}" + +| "CAT" -> "++" +| "DOT" -> "." +| "ELLIPSIS" -> "..." + +| "COMMA" -> "," +| "SEMI" -> ";" +| "COLON" -> ":" +| "VBAR" -> "|" + +| "WILD" -> "_" + +| "EQ" -> "=" +| "EQEQ" -> "==" +| "NE" -> "!=" +| "LT" -> "<" +| "GT" -> ">" +| "LE" -> "<=" +| "GE" -> ">=" +| "ARROW" -> "=>" + +| "NOT" -> "!" +| "BOOL_OR" -> "||" +| "BOOL_AND" -> "&&" + + (* Literals *) + +| "String" -> "\"a string\"" +| "Bytes" -> "0xAA" +| "Int" -> "1" +| "Nat" -> "1n" +| "Mutez" -> "1mutez" +| "Ident" -> id_sym () +| "Constr" -> ctor_sym () + + (* Virtual tokens *) + +| "EOF" -> "" + + (* For completeness of open sum types *) + +| _ -> "" + +(* Unlexing a sentence *) + +let unlex (sentence: string) : Buffer.t = + let tokens = Str.split (Str.regexp " ") sentence in + let lexemes = List.map concrete tokens in + let buffer = Buffer.create 31 in + let rec trans = function + [] -> () + | [s] -> Buffer.add_string buffer s + | s::l -> Buffer.add_string buffer (s ^ " "); trans l + in trans lexemes; buffer + +(* Reading one line from the standard input channel and unlex it. *) + +let out = unlex (input_line stdin) |> Buffer.contents +let () = Printf.printf "%s\n" out