I added support for unlexing to CameLIGO. Fixed cover.sh.

This commit is contained in:
Christian Rinderknecht 2019-12-26 13:23:32 +01:00
parent 7c1d637226
commit ea4eb76013
4 changed files with 123 additions and 16 deletions

View File

@ -2,15 +2,13 @@
(menhir (menhir
(merge_into Parser) (merge_into Parser)
(modules (modules ParToken Parser)
ParToken Parser)
(flags -la 1 --table --strict --explain --external-tokens LexToken)) (flags -la 1 --table --strict --explain --external-tokens LexToken))
(library (library
(name parser_cameligo) (name parser_cameligo)
(public_name ligo.parser.cameligo) (public_name ligo.parser.cameligo)
(modules (modules AST cameligo Parser ParserLog LexToken)
AST cameligo Parser ParserLog LexToken)
(libraries (libraries
menhirLib menhirLib
parser_shared parser_shared
@ -22,18 +20,18 @@
(executable (executable
(name LexerMain) (name LexerMain)
(libraries (libraries parser_cameligo)
parser_cameligo) (modules LexerMain)
(modules
LexerMain)
(flags (:standard -open Parser_shared -open Parser_cameligo))) (flags (:standard -open Parser_shared -open Parser_cameligo)))
(executable (executable
(name ParserMain) (name ParserMain)
(libraries (libraries parser_cameligo)
parser_cameligo)
(modules (modules
ParErr ParErr ParserAPI ParserMain)
ParserAPI
ParserMain)
(flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo))) (flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo)))
(executable
(name Unlexer)
(libraries str)
(modules Unlexer))

View File

@ -0,0 +1,109 @@
(** Converting the textual representation of tokens produced by Menhir
into concrete syntax *)
(* See [ParToken.mly] *)
let gen_sym prefix =
let count = ref 0 in
fun () -> incr count;
prefix ^ string_of_int !count
let id_sym = gen_sym "id"
and ctor_sym = gen_sym "C"
let concrete = function
(* Keywords *)
| "Begin" -> "begin"
| "Else" -> "else"
| "End" -> "end"
| "False" -> "false"
| "Fun" -> "fun"
| "If" -> "if"
| "In" -> "in"
| "Let" -> "let"
| "Match" -> "match"
| "Mod" -> "mod"
| "Not" -> "not"
| "Of" -> "of"
| "Or" -> "or"
| "Then" -> "then"
| "True" -> "true"
| "Type" -> "type"
| "With" -> "with"
(* Data constructors *)
| "C_None" -> "None"
| "C_Some" -> "Some"
(* Symbols *)
| "MINUS" -> "-"
| "PLUS" -> "+"
| "SLASH" -> "/"
| "TIMES" -> "*"
| "LPAR" -> "("
| "RPAR" -> ")"
| "LBRACKET" -> "["
| "RBRACKET" -> "]"
| "LBRACE" -> "{"
| "RBRACE" -> "}"
| "ARROW" -> "->"
| "CONS" -> "::"
| "CAT" -> "^"
| "DOT" -> "."
| "COMMA" -> ","
| "SEMI" -> ";"
| "COLON" -> ":"
| "VBAR" -> "|"
| "WILD" -> "_"
| "EQ" -> "="
| "NE" -> "<>"
| "LT" -> "<"
| "GT" -> ">"
| "LE" -> "<="
| "GE" -> ">="
| "BOOL_OR" -> "||"
| "BOOL_AND" -> "&&"
(* Literals *)
| "String" -> "\"a string\""
| "Bytes" -> "0xAA"
| "Int" -> "1"
| "Nat" -> "1n"
| "Mutez" -> "1mutez"
| "Ident" -> id_sym ()
| "Constr" -> ctor_sym ()
(* Virtual tokens *)
| "EOF" -> ""
(* For completeness of open sum types *)
| _ -> "<Unknown>"
(* Unlexing a sentence *)
let unlex (sentence: string) : Buffer.t =
let tokens = Str.split (Str.regexp " ") sentence in
let lexemes = List.map concrete tokens in
let buffer = Buffer.create 31 in
let rec trans = function
[] -> ()
| [s] -> Buffer.add_string buffer s
| s::l -> Buffer.add_string buffer (s ^ " "); trans l
in trans lexemes; buffer
(* Reading one line from the standard input channel and unlex it. *)
let out = unlex (input_line stdin) |> Buffer.contents
let () = Printf.printf "%s\n" out

View File

@ -111,8 +111,8 @@ done
usage () { usage () {
cat <<EOF cat <<EOF
Usage: $(basename $0) [-h|--help] Usage: $(basename $0) [-h|--help]
--par-tokens=<par_tolens>.mly --par-tokens=<par_tokens>.mly
--lex-tokens=<par_tokens>.mli --lex-tokens=<lex_tokens>.mli
--unlexer=<binary> --unlexer=<binary>
--ext=<extension> --ext=<extension>
--dir=<path> --dir=<path>
@ -121,7 +121,7 @@ Usage: $(basename $0) [-h|--help]
Generates in directory <path> a set of LIGO source files with Generates in directory <path> a set of LIGO source files with
extension <extension> covering all erroneous states of the LR extension <extension> covering all erroneous states of the LR
automaton produced by Menhir from <parser>.mly, <par_tokens>.mly, automaton produced by Menhir from <parser>.mly, <par_tokens>.mly,
<lex_tokens>.mli and <parser>.msg (see script `messages.sh` for <lex_tokens>.mli and <parser>.msg (see script messages.sh for
generating the latter). The LIGO files will be numbered with their generating the latter). The LIGO files will be numbered with their
corresponding state number in the automaton. The executable <binary> corresponding state number in the automaton. The executable <binary>
reads a line on stdin of tokens and produces a line of corresponding reads a line on stdin of tokens and produces a line of corresponding