186 lines
4.8 KiB
OCaml
186 lines
4.8 KiB
OCaml
|
type pre_token = {
|
||
|
name : string ;
|
||
|
pattern : string ;
|
||
|
}
|
||
|
|
||
|
let make name pattern = { name ; pattern }
|
||
|
|
||
|
let keyword = fun k ->
|
||
|
let regexp = Str.regexp "[^0-9a-zA-Z]" in
|
||
|
let constructor_name =
|
||
|
Str.global_replace regexp "_"
|
||
|
@@ String.uppercase_ascii k
|
||
|
in
|
||
|
make constructor_name k
|
||
|
let symbol = fun sym name -> make name sym
|
||
|
|
||
|
module Print_mly = struct
|
||
|
open Format
|
||
|
|
||
|
let token = fun ppf pre_token ->
|
||
|
fprintf ppf "%%token %s" pre_token.name
|
||
|
|
||
|
let tokens = fun ppf tokens ->
|
||
|
let open Simple_utils.PP_helpers in
|
||
|
fprintf ppf "%%token EOF\n" ;
|
||
|
fprintf ppf "%%token <int> INT\n" ;
|
||
|
fprintf ppf "%%token <int> NAT\n" ;
|
||
|
fprintf ppf "%%token <int> TZ\n" ;
|
||
|
fprintf ppf "%%token <string> STRING\n" ;
|
||
|
fprintf ppf "%%token <string> NAME\n" ;
|
||
|
fprintf ppf "%%token <string> CONSTRUCTOR_NAME\n" ;
|
||
|
fprintf ppf "\n%a\n\n" (list_sep token (const "\n")) tokens ;
|
||
|
fprintf ppf "%%%%\n"
|
||
|
end
|
||
|
|
||
|
module Print_mll = struct
|
||
|
open Format
|
||
|
|
||
|
let token = fun ppf {name;pattern} ->
|
||
|
fprintf ppf "| \"%s\" { %s }" pattern name
|
||
|
|
||
|
let pre =
|
||
|
{pre|{
|
||
|
open Token
|
||
|
|
||
|
exception Error of string
|
||
|
exception Unexpected_character of string
|
||
|
}
|
||
|
|
||
|
(* This rule analyzes a single line and turns it into a stream of
|
||
|
tokens. *)
|
||
|
|
||
|
rule token = parse
|
||
|
(*
|
||
|
| "//" ([^ '\n']* ) (['\n' '\r']+)
|
||
|
{ Lexing.new_line lexbuf ; token lexbuf }
|
||
|
*)
|
||
|
| ('\r'? '\n' '\r'?)
|
||
|
{ Lexing.new_line lexbuf; token lexbuf }
|
||
|
| '"' { string "" lexbuf }
|
||
|
| [' ' '\t']
|
||
|
{ token lexbuf }
|
||
|
| (['0'-'9']+ as i) 'p'
|
||
|
{ NAT (int_of_string i) }
|
||
|
| (['0'-'9']+ as n) '.' (['0'-'9']['0'-'9'] as d) "tz" { TZ ((int_of_string n) * 100 + (int_of_string d)) }
|
||
|
| (['0'-'9']+ as i)
|
||
|
{ INT (int_of_string i) }
|
||
|
|pre}
|
||
|
let post =
|
||
|
{post|
|
||
|
| (['a'-'z''_']['a'-'z''A'-'Z''0'-'9''_']*) as v
|
||
|
{ NAME v }
|
||
|
| (['A'-'Z']['a'-'z''A'-'Z''0'-'9''_']*) as v
|
||
|
{ CONSTRUCTOR_NAME v }
|
||
|
| eof { EOF }
|
||
|
| "(*" { comment 1 lexbuf }
|
||
|
| _
|
||
|
{ raise (Unexpected_character (Printf.sprintf "At offset %d: unexpected character.\n" (Lexing.lexeme_start lexbuf))) }
|
||
|
|
||
|
and string s = parse
|
||
|
| "\\\"" { string (s ^ "\"") lexbuf }
|
||
|
| "\\\\" { string (s ^ "\\") lexbuf }
|
||
|
| '"' { STRING s }
|
||
|
| eof { raise (Unexpected_character "missing string terminator") }
|
||
|
| _ as c { string (s ^ (String.make 1 c)) lexbuf }
|
||
|
|
||
|
|
||
|
and comment n = parse
|
||
|
| "*)" { if n = 1 then token lexbuf else comment (n - 1) lexbuf }
|
||
|
| "(*" { comment (n + 1) lexbuf }
|
||
|
| '"' ( [^ '"' '\\'] | ( '\\' [^ '"'] ) ) '"' { comment n lexbuf }
|
||
|
| eof { raise (Unexpected_character "missing comment terminator") }
|
||
|
| ('\r'? '\n' '\r'?) { Lexing.new_line lexbuf; comment n lexbuf }
|
||
|
| _ { comment n lexbuf }
|
||
|
|
||
|
|post}
|
||
|
let tokens = fun ppf tokens ->
|
||
|
let open Simple_utils.PP_helpers in
|
||
|
fprintf ppf "%s%a\n%s" pre (list_sep token (const "\n")) tokens post
|
||
|
end
|
||
|
|
||
|
module Print_ml = struct
|
||
|
open Format
|
||
|
|
||
|
let token = fun ppf {name} ->
|
||
|
fprintf ppf " | %s -> \"%s\"" name name
|
||
|
|
||
|
let pre =
|
||
|
{pre|include Token_type
|
||
|
|
||
|
let to_string : token -> string = function
|
||
|
| STRING _ -> "STRING"
|
||
|
| NAME _ -> "NAME s"
|
||
|
| CONSTRUCTOR_NAME _ -> "CONSTRUCTOR_NAME s"
|
||
|
| INT _ -> "INT n"
|
||
|
| NAT _ -> "NAT n"
|
||
|
| TZ _ -> "TZ n"
|
||
|
| EOF -> "EOF"
|
||
|
|pre}
|
||
|
|
||
|
let tokens = fun ppf tokens ->
|
||
|
let open Simple_utils.PP_helpers in
|
||
|
fprintf ppf "%s%a" pre (list_sep token (const "\n")) tokens
|
||
|
end
|
||
|
|
||
|
let tokens = [
|
||
|
keyword "let%init" ;
|
||
|
keyword "let%entry" ;
|
||
|
keyword "let" ;
|
||
|
keyword "type" ;
|
||
|
keyword "in" ;
|
||
|
keyword "if" ;
|
||
|
keyword "then" ;
|
||
|
keyword "else" ;
|
||
|
(* keyword "block" ;
|
||
|
* keyword "for" ;
|
||
|
* keyword "const" ; *)
|
||
|
keyword "fun" ;
|
||
|
keyword "match" ;
|
||
|
keyword "with" ;
|
||
|
symbol "()" "UNIT" ;
|
||
|
symbol "+" "PLUS" ;
|
||
|
symbol "~" "TILDE" ;
|
||
|
symbol "->" "ARROW" ;
|
||
|
symbol "<-" "LEFT_ARROW" ;
|
||
|
symbol "<=" "LE" ;
|
||
|
symbol "<>" "UNEQUAL" ;
|
||
|
symbol "<" "LT" ;
|
||
|
symbol ">" "GT" ;
|
||
|
symbol "-" "MINUS" ;
|
||
|
symbol "*" "TIMES" ;
|
||
|
symbol "/" "DIV" ;
|
||
|
symbol "=" "EQUAL" ;
|
||
|
symbol "|" "VBAR" ;
|
||
|
symbol "[" "LSQUARE" ;
|
||
|
symbol "]" "RSQUARE" ;
|
||
|
symbol "(" "LPAREN" ;
|
||
|
symbol ")" "RPAREN" ;
|
||
|
symbol "{" "LBRACKET" ;
|
||
|
symbol "}" "RBRACKET" ;
|
||
|
symbol ";;" "DOUBLE_SEMICOLON" ;
|
||
|
symbol ";" "SEMICOLON" ;
|
||
|
symbol "::" "DOUBLE_COLON" ;
|
||
|
symbol ":" "COLON" ;
|
||
|
symbol "," "COMMA" ;
|
||
|
symbol "." "DOT" ;
|
||
|
]
|
||
|
|
||
|
let () =
|
||
|
let argn = Array.length Sys.argv in
|
||
|
if argn = 1 then exit 1 ;
|
||
|
let arg = Sys.argv.(1) in
|
||
|
let open Simple_utils.PP_helpers in
|
||
|
match arg with
|
||
|
| "mll" -> (
|
||
|
Format.printf "%a@.%a\n" comment "Generated .mll" Print_mll.tokens tokens
|
||
|
)
|
||
|
| "mly" -> (
|
||
|
Format.printf "%a@.%a\n" comment "Generated .mly" Print_mly.tokens tokens
|
||
|
)
|
||
|
| "ml" -> (
|
||
|
Format.printf "%a@.%a\n" comment "Generated .ml" Print_ml.tokens tokens
|
||
|
)
|
||
|
| _ -> exit 1
|
||
|
|