I added support for unlexing to CameLIGO. Fixed cover.sh.

This commit is contained in:
Christian Rinderknecht 2019-12-26 13:23:32 +01:00
parent 7c1d637226
commit ea4eb76013
4 changed files with 123 additions and 16 deletions

View File

@ -2,15 +2,13 @@
(menhir
(merge_into Parser)
(modules
ParToken Parser)
(modules ParToken Parser)
(flags -la 1 --table --strict --explain --external-tokens LexToken))
(library
(name parser_cameligo)
(public_name ligo.parser.cameligo)
(modules
AST cameligo Parser ParserLog LexToken)
(modules AST cameligo Parser ParserLog LexToken)
(libraries
menhirLib
parser_shared
@ -22,18 +20,18 @@
(executable
(name LexerMain)
(libraries
parser_cameligo)
(modules
LexerMain)
(libraries parser_cameligo)
(modules LexerMain)
(flags (:standard -open Parser_shared -open Parser_cameligo)))
(executable
(name ParserMain)
(libraries
parser_cameligo)
(libraries parser_cameligo)
(modules
ParErr
ParserAPI
ParserMain)
ParErr ParserAPI ParserMain)
(flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo)))
(executable
(name Unlexer)
(libraries str)
(modules Unlexer))

View File

@ -0,0 +1,109 @@
(** Converting the textual representation of tokens produced by Menhir
into concrete syntax *)
(* See [ParToken.mly] *)
let gen_sym prefix =
let count = ref 0 in
fun () -> incr count;
prefix ^ string_of_int !count
let id_sym = gen_sym "id"
and ctor_sym = gen_sym "C"
let concrete = function
(* Keywords *)
| "Begin" -> "begin"
| "Else" -> "else"
| "End" -> "end"
| "False" -> "false"
| "Fun" -> "fun"
| "If" -> "if"
| "In" -> "in"
| "Let" -> "let"
| "Match" -> "match"
| "Mod" -> "mod"
| "Not" -> "not"
| "Of" -> "of"
| "Or" -> "or"
| "Then" -> "then"
| "True" -> "true"
| "Type" -> "type"
| "With" -> "with"
(* Data constructors *)
| "C_None" -> "None"
| "C_Some" -> "Some"
(* Symbols *)
| "MINUS" -> "-"
| "PLUS" -> "+"
| "SLASH" -> "/"
| "TIMES" -> "*"
| "LPAR" -> "("
| "RPAR" -> ")"
| "LBRACKET" -> "["
| "RBRACKET" -> "]"
| "LBRACE" -> "{"
| "RBRACE" -> "}"
| "ARROW" -> "->"
| "CONS" -> "::"
| "CAT" -> "^"
| "DOT" -> "."
| "COMMA" -> ","
| "SEMI" -> ";"
| "COLON" -> ":"
| "VBAR" -> "|"
| "WILD" -> "_"
| "EQ" -> "="
| "NE" -> "<>"
| "LT" -> "<"
| "GT" -> ">"
| "LE" -> "<="
| "GE" -> ">="
| "BOOL_OR" -> "||"
| "BOOL_AND" -> "&&"
(* Literals *)
| "String" -> "\"a string\""
| "Bytes" -> "0xAA"
| "Int" -> "1"
| "Nat" -> "1n"
| "Mutez" -> "1mutez"
| "Ident" -> id_sym ()
| "Constr" -> ctor_sym ()
(* Virtual tokens *)
| "EOF" -> ""
(* For completeness of open sum types *)
| _ -> "<Unknown>"
(* Unlexing a sentence *)
let unlex (sentence: string) : Buffer.t =
let tokens = Str.split (Str.regexp " ") sentence in
let lexemes = List.map concrete tokens in
let buffer = Buffer.create 31 in
let rec trans = function
[] -> ()
| [s] -> Buffer.add_string buffer s
| s::l -> Buffer.add_string buffer (s ^ " "); trans l
in trans lexemes; buffer
(* Reading one line from the standard input channel and unlex it. *)
let out = unlex (input_line stdin) |> Buffer.contents
let () = Printf.printf "%s\n" out

View File

@ -111,8 +111,8 @@ done
usage () {
cat <<EOF
Usage: $(basename $0) [-h|--help]
--par-tokens=<par_tolens>.mly
--lex-tokens=<par_tokens>.mli
--par-tokens=<par_tokens>.mly
--lex-tokens=<lex_tokens>.mli
--unlexer=<binary>
--ext=<extension>
--dir=<path>
@ -121,7 +121,7 @@ Usage: $(basename $0) [-h|--help]
Generates in directory <path> a set of LIGO source files with
extension <extension> covering all erroneous states of the LR
automaton produced by Menhir from <parser>.mly, <par_tokens>.mly,
<lex_tokens>.mli and <parser>.msg (see script `messages.sh` for
<lex_tokens>.mli and <parser>.msg (see script messages.sh for
generating the latter). The LIGO files will be numbered with their
corresponding state number in the automaton. The executable <binary>
reads a line on stdin of tokens and produces a line of corresponding