From 07a70b464804aa6788b5121550523bc6c7e4028b Mon Sep 17 00:00:00 2001
From: Christian Rinderknecht <Christian.Rinderknecht@tezcore.com>
Date: Thu, 26 Dec 2019 13:31:54 +0100
Subject: [PATCH] Added unlexing to ReasonLIGO.

---
 src/passes/1-parser/reasonligo/.unlexer.tag |   0
 src/passes/1-parser/reasonligo/dune         |   5 +
 src/passes/1-parser/reasonligo/unlexer.ml   | 103 ++++++++++++++++++++
 3 files changed, 108 insertions(+)
 create mode 100644 src/passes/1-parser/reasonligo/.unlexer.tag
 create mode 100644 src/passes/1-parser/reasonligo/unlexer.ml
diff --git a/src/passes/1-parser/reasonligo/.unlexer.tag b/src/passes/1-parser/reasonligo/.unlexer.tag
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/passes/1-parser/reasonligo/dune b/src/passes/1-parser/reasonligo/dune
index 57fd2b818..09ce70c68 100644
--- a/src/passes/1-parser/reasonligo/dune
+++ b/src/passes/1-parser/reasonligo/dune
@@ -38,3 +38,8 @@
     ParserAPI
     ParserMain)
   (flags (:standard -open Simple_utils -open Parser_cameligo -open Parser_shared -open Parser_reasonligo)))
+
+(executable
+  (name Unlexer)
+  (libraries str)
+  (modules Unlexer))
diff --git a/src/passes/1-parser/reasonligo/unlexer.ml b/src/passes/1-parser/reasonligo/unlexer.ml
new file mode 100644
index 000000000..6628024d9
--- /dev/null
+++ b/src/passes/1-parser/reasonligo/unlexer.ml
@@ -0,0 +1,103 @@
+(** Converting the textual representation of tokens produced by Menhir
+    into concrete syntax *)
+
+(* See [ParToken.mly] *)
+
+let gen_sym prefix =
+  let count = ref 0 in
+  fun () -> incr count;
+         prefix ^ string_of_int !count
+
+let id_sym   = gen_sym "id"
+and ctor_sym = gen_sym "C"
+
+let concrete = function
+  (* Keywords *)
+
+| "Else"   -> "else"
+| "False"  -> "false"
+| "If"     -> "if"
+| "Let"    -> "let"
+| "Switch" -> "switch"
+| "Mod"    -> "mod"
+| "Or"     -> "or"
+| "True"   -> "true"
+| "Type"   -> "type"
+
+  (* Data constructors *)
+
+| "C_None"   -> "None"
+| "C_Some"   -> "Some"
+
+  (* Symbols *)
+
+| "MINUS"    -> "-"
+| "PLUS"     -> "+"
+| "SLASH"    -> "/"
+| "TIMES"    -> "*"
+
+| "LPAR"     -> "("
+| "RPAR"     -> ")"
+| "LBRACKET" -> "["
+| "RBRACKET" -> "]"
+| "LBRACE"   -> "{"
+| "RBRACE"   -> "}"
+
+| "CAT"      -> "++"
+| "DOT"      -> "."
+| "ELLIPSIS" -> "..."
+
+| "COMMA"    -> ","
+| "SEMI"     -> ";"
+| "COLON"    -> ":"
+| "VBAR"     -> "|"
+
+| "WILD"     -> "_"
+
+| "EQ"       -> "="
+| "EQEQ"     -> "=="
+| "NE"       -> "!="
+| "LT"       -> "<"
+| "GT"       -> ">"
+| "LE"       -> "<="
+| "GE"       -> ">="
+| "ARROW"    -> "=>"
+
+| "NOT"      -> "!"
+| "BOOL_OR"  -> "||"
+| "BOOL_AND" -> "&&"
+
+  (* Literals *)
+
+| "String"   -> "\"a string\""
+| "Bytes"    -> "0xAA"
+| "Int"      -> "1"
+| "Nat"      -> "1n"
+| "Mutez"    -> "1mutez"
+| "Ident"    -> id_sym ()
+| "Constr"   -> ctor_sym ()
+
+  (* Virtual tokens *)
+
+| "EOF"      -> ""
+
+  (* For completeness of open sum types *)
+
+| _          -> "<Unknown>"
+
+(* Unlexing a sentence *)
+
+let unlex (sentence: string) : Buffer.t =
+  let tokens  = Str.split (Str.regexp " ") sentence in
+  let lexemes = List.map concrete tokens in
+  let buffer  = Buffer.create 31 in
+  let rec trans = function
+      [] -> ()
+  |  [s] -> Buffer.add_string buffer s
+  | s::l -> Buffer.add_string buffer (s ^ " "); trans l
+  in trans lexemes; buffer
+
+(* Reading one line from the standard input channel and unlex it. *)
+
+let out = unlex (input_line stdin) |> Buffer.contents
+let ()  = Printf.printf "%s\n" out