ligo/vendors/Preproc/E_Lexer.mll

106 lines
2.4 KiB
OCaml
Raw Normal View History

2020-03-23 22:43:06 +04:00
(* Auxiliary scanner for boolean expressions of the C# preprocessor *)
{
2020-03-24 20:47:24 +04:00
(* START OF HEADER *)
2020-03-23 22:43:06 +04:00
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
let sprintf = Printf.sprintf
open E_Parser
(* Concrete syntax of tokens. See module [E_Parser]. *)
let string_of_token = function
True -> "true"
| False -> "false"
| Ident id -> id
| OR -> "||"
| AND -> "&&"
| EQ -> "=="
| NEQ -> "!="
| NOT -> "!"
| LPAR -> "("
| RPAR -> ")"
| EOL -> "EOL"
(* Errors *)
type error = Invalid_character of char
2020-03-23 22:43:06 +04:00
let error_to_string = function
Invalid_character c ->
sprintf "Invalid character '%c' (%d)." c (Char.code c)
2020-03-23 22:43:06 +04:00
let format ?(offsets=true) Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets `Byte in
let value = sprintf "Preprocessing error %s:\n%s" reg msg
in Region.{value; region}
2020-03-23 22:43:06 +04:00
exception Error of error Region.reg
2020-03-23 22:43:06 +04:00
let mk_reg buffer =
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
in Region.make ~start ~stop
let stop value region = raise (Error Region.{region; value})
let fail error buffer = stop error (mk_reg buffer)
2020-03-24 20:47:24 +04:00
(* END OF HEADER *)
2020-03-23 22:43:06 +04:00
}
(* Regular expressions for literals *)
(* White space *)
let newline = '\n' | '\r' | "\r\n"
let blank = ' ' | '\t'
(* Unicode escape sequences *)
let digit = ['0'-'9']
let hexdigit = digit | ['A'-'F' 'a'-'f']
let four_hex = hexdigit hexdigit hexdigit hexdigit
let uni_esc = "\\u" four_hex | "\\U" four_hex four_hex
(* Identifiers *)
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let letter = lowercase | uppercase | uni_esc
let start = '_' | letter
let alphanum = letter | digit | '_'
let ident = start alphanum*
(* Rules *)
rule scan = parse
blank+ { scan lexbuf }
| newline { Lexing.new_line lexbuf; EOL }
| eof { EOL }
| "true" { True }
| "false" { False }
| ident as id { Ident id }
| '(' { LPAR }
| ')' { RPAR }
| "||" { OR }
| "&&" { AND }
| "==" { EQ }
| "!=" { NEQ }
| "!" { NOT }
| "//" { inline_com lexbuf }
| _ as c { fail (Invalid_character c) lexbuf }
2020-03-23 22:43:06 +04:00
and inline_com = parse
newline { Lexing.new_line lexbuf; EOL }
| eof { EOL }
| _ { inline_com lexbuf }
{
2020-03-24 20:47:24 +04:00
(* START OF TRAILER *)
(* END OF TRAILER *)
2020-03-23 22:43:06 +04:00
}