diff --git a/src/passes/1-parser/cameligo/.unlexer.tag b/src/passes/1-parser/cameligo/.unlexer.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/passes/1-parser/cameligo/dune b/src/passes/1-parser/cameligo/dune index 0c9e80bf0..d882434df 100644 --- a/src/passes/1-parser/cameligo/dune +++ b/src/passes/1-parser/cameligo/dune @@ -2,15 +2,13 @@ (menhir (merge_into Parser) - (modules - ParToken Parser) + (modules ParToken Parser) (flags -la 1 --table --strict --explain --external-tokens LexToken)) (library (name parser_cameligo) (public_name ligo.parser.cameligo) - (modules - AST cameligo Parser ParserLog LexToken) + (modules AST cameligo Parser ParserLog LexToken) (libraries menhirLib parser_shared @@ -22,18 +20,18 @@ (executable (name LexerMain) - (libraries - parser_cameligo) - (modules - LexerMain) + (libraries parser_cameligo) + (modules LexerMain) (flags (:standard -open Parser_shared -open Parser_cameligo))) (executable (name ParserMain) - (libraries - parser_cameligo) + (libraries parser_cameligo) (modules - ParErr - ParserAPI - ParserMain) + ParErr ParserAPI ParserMain) (flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo))) + +(executable + (name Unlexer) + (libraries str) + (modules Unlexer)) diff --git a/src/passes/1-parser/cameligo/unlexer.ml b/src/passes/1-parser/cameligo/unlexer.ml new file mode 100644 index 000000000..1d4ac5fef --- /dev/null +++ b/src/passes/1-parser/cameligo/unlexer.ml @@ -0,0 +1,109 @@ +(** Converting the textual representation of tokens produced by Menhir + into concrete syntax *) + +(* See [ParToken.mly] *) + +let gen_sym prefix = + let count = ref 0 in + fun () -> incr count; + prefix ^ string_of_int !count + +let id_sym = gen_sym "id" +and ctor_sym = gen_sym "C" + +let concrete = function + (* Keywords *) + +| "Begin" -> "begin" +| "Else" -> "else" +| "End" -> "end" +| "False" -> "false" +| "Fun" -> "fun" +| "If" -> "if" +| "In" -> "in" +| "Let" -> "let" +| "Match" -> "match" +| "Mod" -> "mod" +| "Not" -> "not" +| "Of" -> "of" +| "Or" -> "or" +| "Then" -> "then" +| "True" -> "true" +| "Type" -> "type" +| "With" -> "with" + + (* Data constructors *) + +| "C_None" -> "None" +| "C_Some" -> "Some" + + (* Symbols *) + +| "MINUS" -> "-" +| "PLUS" -> "+" +| "SLASH" -> "/" +| "TIMES" -> "*" + +| "LPAR" -> "(" +| "RPAR" -> ")" +| "LBRACKET" -> "[" +| "RBRACKET" -> "]" +| "LBRACE" -> "{" +| "RBRACE" -> "}" + +| "ARROW" -> "->" +| "CONS" -> "::" +| "CAT" -> "^" +| "DOT" -> "." + +| "COMMA" -> "," +| "SEMI" -> ";" +| "COLON" -> ":" +| "VBAR" -> "|" + +| "WILD" -> "_" + +| "EQ" -> "=" +| "NE" -> "<>" +| "LT" -> "<" +| "GT" -> ">" +| "LE" -> "<=" +| "GE" -> ">=" + +| "BOOL_OR" -> "||" +| "BOOL_AND" -> "&&" + + (* Literals *) + +| "String" -> "\"a string\"" +| "Bytes" -> "0xAA" +| "Int" -> "1" +| "Nat" -> "1n" +| "Mutez" -> "1mutez" +| "Ident" -> id_sym () +| "Constr" -> ctor_sym () + + (* Virtual tokens *) + +| "EOF" -> "" + + (* For completeness of open sum types *) + +| _ -> "" + +(* Unlexing a sentence *) + +let unlex (sentence: string) : Buffer.t = + let tokens = Str.split (Str.regexp " ") sentence in + let lexemes = List.map concrete tokens in + let buffer = Buffer.create 31 in + let rec trans = function + [] -> () + | [s] -> Buffer.add_string buffer s + | s::l -> Buffer.add_string buffer (s ^ " "); trans l + in trans lexemes; buffer + +(* Reading one line from the standard input channel and unlex it. *) + +let out = unlex (input_line stdin) |> Buffer.contents +let () = Printf.printf "%s\n" out diff --git a/vendors/ligo-utils/simple-utils/cover.sh b/vendors/ligo-utils/simple-utils/cover.sh index 01281ef6d..b7331dd37 100755 --- a/vendors/ligo-utils/simple-utils/cover.sh +++ b/vendors/ligo-utils/simple-utils/cover.sh @@ -111,8 +111,8 @@ done usage () { cat <.mly - --lex-tokens=.mli + --par-tokens=.mly + --lex-tokens=.mli --unlexer= --ext= --dir= @@ -121,7 +121,7 @@ Usage: $(basename $0) [-h|--help] Generates in directory a set of LIGO source files with extension covering all erroneous states of the LR automaton produced by Menhir from .mly, .mly, -.mli and .msg (see script `messages.sh` for +.mli and .msg (see script messages.sh for generating the latter). The LIGO files will be numbered with their corresponding state number in the automaton. The executable reads a line on stdin of tokens and produces a line of corresponding