type pre_token = { name : string ; pattern : string ; } let make name pattern = { name ; pattern } let keyword = fun k -> let regexp = Str.regexp "[^0-9a-zA-Z]" in let constructor_name = Str.global_replace regexp "_" @@ String.uppercase_ascii k in make constructor_name k let symbol = fun sym name -> make name sym module Print_mly = struct open Format let token = fun ppf pre_token -> fprintf ppf "%%token %s" pre_token.name let tokens = fun ppf tokens -> let open Simple_utils.PP_helpers in fprintf ppf "%%token EOF\n" ; fprintf ppf "%%token INT\n" ; fprintf ppf "%%token NAT\n" ; fprintf ppf "%%token TZ\n" ; fprintf ppf "%%token STRING\n" ; fprintf ppf "%%token NAME\n" ; fprintf ppf "%%token CONSTRUCTOR_NAME\n" ; fprintf ppf "\n%a\n\n" (list_sep token (const "\n")) tokens ; fprintf ppf "%%%%\n" end module Print_mll = struct open Format let token = fun ppf {name;pattern} -> fprintf ppf "| \"%s\" { %s }" pattern name let pre = {pre|{ open Token exception Error of string exception Unexpected_character of string } (* This rule analyzes a single line and turns it into a stream of tokens. *) rule token = parse (* | "//" ([^ '\n']* ) (['\n' '\r']+) { Lexing.new_line lexbuf ; token lexbuf } *) | ('\r'? '\n' '\r'?) { Lexing.new_line lexbuf; token lexbuf } | '"' { string "" lexbuf } | [' ' '\t'] { token lexbuf } | (['0'-'9']+ as i) 'p' { NAT (int_of_string i) } | (['0'-'9']+ as n) '.' (['0'-'9']['0'-'9'] as d) "tz" { TZ ((int_of_string n) * 100 + (int_of_string d)) } | (['0'-'9']+ as i) { INT (int_of_string i) } |pre} let post = {post| | (['a'-'z''_']['a'-'z''A'-'Z''0'-'9''_']*) as v { NAME v } | (['A'-'Z']['a'-'z''A'-'Z''0'-'9''_']*) as v { CONSTRUCTOR_NAME v } | eof { EOF } | "(*" { comment 1 lexbuf } | _ { raise (Unexpected_character (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) } and string s = parse | "\\\"" { string (s ^ "\"") lexbuf } | "\\\\" { string (s ^ "\\") lexbuf } | '"' { STRING s } | eof { raise (Unexpected_character "missing string terminator") } | _ as c { string (s ^ (String.make 1 c)) lexbuf } and comment n = parse | "*)" { if n = 1 then token lexbuf else comment (n - 1) lexbuf } | "(*" { comment (n + 1) lexbuf } | '"' ( [^ '"' '\\'] | ( '\\' [^ '"'] ) ) '"' { comment n lexbuf } | eof { raise (Unexpected_character "missing comment terminator") } | ('\r'? '\n' '\r'?) { Lexing.new_line lexbuf; comment n lexbuf } | _ { comment n lexbuf } |post} let tokens = fun ppf tokens -> let open Simple_utils.PP_helpers in fprintf ppf "%s%a\n%s" pre (list_sep token (const "\n")) tokens post end module Print_ml = struct open Format let token = fun ppf {name} -> fprintf ppf " | %s -> \"%s\"" name name let pre = {pre|include Token_type let to_string : token -> string = function | STRING _ -> "STRING" | NAME _ -> "NAME s" | CONSTRUCTOR_NAME _ -> "CONSTRUCTOR_NAME s" | INT _ -> "INT n" | NAT _ -> "NAT n" | TZ _ -> "TZ n" | EOF -> "EOF" |pre} let tokens = fun ppf tokens -> let open Simple_utils.PP_helpers in fprintf ppf "%s%a" pre (list_sep token (const "\n")) tokens end let tokens = [ keyword "let%init" ; keyword "let%entry" ; keyword "let" ; keyword "type" ; keyword "in" ; keyword "if" ; keyword "then" ; keyword "else" ; (* keyword "block" ; * keyword "for" ; * keyword "const" ; *) keyword "fun" ; keyword "match" ; keyword "with" ; symbol "()" "UNIT" ; symbol "+" "PLUS" ; symbol "~" "TILDE" ; symbol "->" "ARROW" ; symbol "<-" "LEFT_ARROW" ; symbol "<=" "LE" ; symbol "<>" "UNEQUAL" ; symbol "<" "LT" ; symbol ">" "GT" ; symbol "-" "MINUS" ; symbol "*" "TIMES" ; symbol "/" "DIV" ; symbol "=" "EQUAL" ; symbol "|" "VBAR" ; symbol "[" "LSQUARE" ; symbol "]" "RSQUARE" ; symbol "(" "LPAREN" ; symbol ")" "RPAREN" ; symbol "{" "LBRACKET" ; symbol "}" "RBRACKET" ; symbol ";;" "DOUBLE_SEMICOLON" ; symbol ";" "SEMICOLON" ; symbol "::" "DOUBLE_COLON" ; symbol ":" "COLON" ; symbol "," "COMMA" ; symbol "." "DOT" ; ] let () = let argn = Array.length Sys.argv in if argn = 1 then exit 1 ; let arg = Sys.argv.(1) in let open Simple_utils.PP_helpers in match arg with | "mll" -> ( Format.printf "%a@.%a\n" comment "Generated .mll" Print_mll.tokens tokens ) | "mly" -> ( Format.printf "%a@.%a\n" comment "Generated .mly" Print_mly.tokens tokens ) | "ml" -> ( Format.printf "%a@.%a\n" comment "Generated .ml" Print_ml.tokens tokens ) | _ -> exit 1