ligo/src/client/embedded/bootstrap/concrete_lexer.mll


{

open Concrete_parser

open Script_located_ir

let count_nl s =
  let c = ref 0 in
  for i = 0 to String.length s - 1 do
    if Compare.Char.(s.[i] = '\010') then
      incr c
  done;
  !c

let update_loc lexbuf nl indent =
  let open Lexing in
  let lcp = lexbuf.lex_curr_p in
  lexbuf.lex_curr_p <- { lcp with
                         pos_lnum = lcp.pos_lnum + nl;
                         pos_bol = lcp.pos_cnum - indent;
                       }

let may_update_loc lexbuf nl indent =
  if Compare.Int.(nl <> 0) then update_loc lexbuf nl indent

let start_offset lexbuf =
  let open Lexing in
  let lsp = lexbuf.lex_start_p in
  lsp.pos_cnum - lsp.pos_bol

let end_offset lexbuf =
  let open Lexing in
  let lcp = lexbuf.lex_curr_p in
  lcp.pos_cnum - lcp.pos_bol

let curr_location lexbuf =
  lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p

let pos pos =
  Lexing.(pos.pos_lnum, pos.pos_cnum - pos.pos_bol)

let pos2 (start, stop) =
  pos start, pos stop

(* To translate escape sequences *)

let char_for_backslash = function
  | 'n' -> '\010'
  | 'r' -> '\013'
  | 'b' -> '\008'
  | 't' -> '\009'
  | c   -> c

let char_for_decimal_code lexbuf i =
  let c = 100 * (int_of_char(Lexing.lexeme_char lexbuf i) - 48) +
           10 * (int_of_char(Lexing.lexeme_char lexbuf (i+1)) - 48) +
                (int_of_char(Lexing.lexeme_char lexbuf (i+2)) - 48) in
  if Compare.Int.(c < 0 || c > 255) then
    raise @@ Illegal_escape (pos2 (curr_location lexbuf), Lexing.lexeme lexbuf)
  else char_of_int c

let char_for_hexadecimal_code lexbuf i =
  let d1 = int_of_char (Lexing.lexeme_char lexbuf i) in
  let val1 = if Compare.Int.(d1 >= 97) then d1 - 87
             else if Compare.Int.(d1 >= 65) then d1 - 55
             else d1 - 48
  in
  let d2 = int_of_char (Lexing.lexeme_char lexbuf (i+1)) in
  let val2 = if Compare.Int.(d2 >= 97) then d2 - 87
             else if Compare.Int.(d2 >= 65) then d2 - 55
             else d2 - 48
  in
  char_of_int (val1 * 16 + val2)

(** Lexer state *)

type state = {
  mutable indent_stack:
    (int * [`Indent | `Open of (char * (Lexing.position * Lexing.position)) ]) list;
  mutable buffer: Concrete_parser.token list;
  mutable string_buff: bytes;
  mutable string_index: int;
  mutable string_start_loc: Lexing.position * Lexing.position;
  mutable comment_start_loc: (Lexing.position * Lexing.position) list;
}

let init_state () = {
  indent_stack = [];
  buffer = [];
  string_index = 0;
  string_buff = Bytes.create 256;
  string_start_loc = Lexing.dummy_pos, Lexing.dummy_pos;
  comment_start_loc = [];
}


(** String helpers *)

let reset_string_buffer st =
  st.string_buff <- Bytes.create 256;
  st.string_index <- 0

let store_string_char st c =
  if st.string_index >= Bytes.length st.string_buff then begin
    let new_buff = Bytes.create (Bytes.length (st.string_buff) * 2) in
    Bytes.blit st.string_buff 0 new_buff 0 (Bytes.length st.string_buff);
    st.string_buff <- new_buff
  end;
  Bytes.set st.string_buff st.string_index c;
  st.string_index <- st.string_index + 1

let store_string st s =
  for i = 0 to String.length s - 1 do
    store_string_char st s.[i];
  done

let store_lexeme st lexbuf =
  store_string st (Lexing.lexeme lexbuf)

let get_stored_string st =
  let s = Bytes.sub st.string_buff 0 st.string_index in
  st.string_buff <- Bytes.create 256;
  Bytes.to_string s


(** Indentation helpers *)

let first_token st =
  match st.indent_stack with
  | [] -> true
  | _ :: _ -> false

let starting_offset (start, _) =
  let open Lexing in
  start.pos_cnum - start.pos_bol

let rec pop_indent st loc xs i =
  match xs with
  | [] -> assert false
  | ((x, _) :: _) as xs when Compare.Int.(x = i) ->
      st.indent_stack <- xs;
      [NEWLINE]
  | (x, `Indent) :: xs ->
      if Compare.Int.(x > i) then
        DEDENT :: pop_indent st loc xs i
      else
        raise @@ Invalid_indentation (pos2 loc)
  | (_, `Open (c, opener_loc)) :: _ ->
      let opener_offset = starting_offset opener_loc in
      if Compare.Int.(i > opener_offset) then
        raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)
      else
        raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)

let indent_token st loc =
  let i = starting_offset loc in
  match st.indent_stack with
  | (x, `Indent) :: xs when Compare.Int.(x > i) ->
      DEDENT :: pop_indent st loc xs i;
  | (x, `Open (c, opener_loc)) :: _ when Compare.Int.(x > i) ->
      let opener_offset = starting_offset opener_loc in
      if Compare.Int.(i > opener_offset) then
        raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)
      else
        raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)
  | (x, _) :: _ when Compare.Int.(x = i) ->
      [NEWLINE]
  | [] | (_, _) :: _ (* when Compare.Int.(x < i) *) ->
      st.indent_stack <- (i, `Indent) :: st.indent_stack;
      [INDENT]

let open_block st opener opener_loc token_offset =
  let opener_offset = starting_offset opener_loc in
  if Compare.Int.(token_offset <= opener_offset) then
    raise @@ Invalid_indentation_after_opener (pos2 opener_loc, opener) ;
  st.indent_stack <-
    (token_offset, `Open (opener, opener_loc)) :: st.indent_stack;
  match opener with
  | '{' -> [LBRACE]
  | '(' -> [LPAREN]
  |  _  -> assert false

let close_block st bol closer closer_loc =
  let closer_offset = starting_offset closer_loc in
  let rec pop xs =
    match xs with
    | [] -> raise @@ Unopened (pos2 closer_loc, closer)
    | (_, `Indent) :: xs -> DEDENT :: pop xs
    | (_, `Open (opener, opener_loc)) :: xs ->
        let opener_offset = starting_offset opener_loc in
        if bol && Compare.Int.(opener_offset <> closer_offset) then
          raise @@
          Unaligned_closer (pos2 closer_loc, opener, closer, pos2 opener_loc) ;
        st.indent_stack <- xs;
        [ match opener, closer with
          | '{', '}' -> RBRACE
          | '(', ')' -> RPAREN
          |  _       ->
              raise @@ Unclosed (pos2 closer_loc, opener, pos2 opener_loc) ]
  in
  pop st.indent_stack

}

let eol_comment = '#' [^ '\010'] *
let newline = eol_comment ? ('\010' | "\013\010" )
let space = [' ']
let firstidentchar = ['A'-'Z' 'a'-'z' '_']
let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']
let decimal_literal =
  ['0'-'9'] ['0'-'9' '_']*
let hex_literal =
  '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
let oct_literal =
  '0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
let bin_literal =
  '0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
let int_literal =
  '-' ? ( decimal_literal | hex_literal | oct_literal | bin_literal)

rule indent_tokens st nl = parse

| space   { indent_tokens st nl lexbuf }
| newline { Lexing.new_line lexbuf; indent_tokens st (nl + 1) lexbuf }

| ""
    { let bol = nl <> 0 || first_token st in
      if bol then indent_token st (curr_location lexbuf) else [] }

| "/*"
    { st.comment_start_loc <- [curr_location lexbuf];
      comment st nl lexbuf }

| ('{' | '(' as opener)
    { let opener_loc = curr_location lexbuf in
      let token_offset = next_token_indent st lexbuf in
      let bol = nl <> 0 || first_token st in
      let prefix =
        if bol then indent_token st opener_loc else [] in
      prefix @ open_block st opener opener_loc token_offset }

| ('}' | ')' as closer)
    { let closer_loc = curr_location lexbuf in
      let bol = Compare.Int.(nl <> 0) in
      close_block st bol closer closer_loc }

| eof
    { List.map
        (function
          | (_, `Indent) -> DEDENT
          | (_, `Open (c, loc)) ->
              raise @@ Unclosed (pos2 (curr_location lexbuf), c, pos2 loc))
        st.indent_stack
      @ [EOF]
    }

and comment st nl = parse

| "/*"    { st.comment_start_loc <-
              curr_location lexbuf :: st.comment_start_loc;
            comment st nl lexbuf }

| "*/"    { match st.comment_start_loc with
            | [] -> assert false
            | [_] -> indent_tokens st nl lexbuf
            | _ :: xs -> st.comment_start_loc <- xs; comment st nl lexbuf }

| "\""    { st.string_start_loc <- curr_location lexbuf;
            let nl =
              try string st nl lexbuf
              with Unterminated_string str_start ->
                match st.comment_start_loc with
                | [] -> assert false
                | loc :: _ ->
                    let start = List.hd (List.rev st.comment_start_loc) in
                    raise @@
                    Unterminated_string_in_comment (pos2 loc, pos2 start, str_start)
            in
            comment st nl lexbuf }

| newline { Lexing.new_line lexbuf; comment st (nl+1) lexbuf }

| eof     { match st.comment_start_loc with
            | [] -> assert false
            | loc :: _ ->
                let start = List.hd (List.rev st.comment_start_loc) in
                raise @@ Unterminated_comment (pos2 loc, pos2 start) }

| _       { comment st nl lexbuf }


(** Eat spacings and return the next token offset. *)
and next_token_indent st = parse

| space   { next_token_indent st lexbuf }

| newline { Lexing.new_line lexbuf; next_token_indent st lexbuf }

| ""      { end_offset lexbuf }

(** The lexer for non-indentation tokens.
    It should not care about 'space', 'newline', '{}()' nor comments. *)
and raw_token st = parse

| ";" { SEMICOLON }

| firstidentchar identchar *
    { PRIM (Lexing.lexeme lexbuf) }

| int_literal
    { INT (Lexing.lexeme lexbuf) }

| "\""
    { reset_string_buffer st;
      let string_start = lexbuf.Lexing.lex_start_p in
      st.string_start_loc <- curr_location lexbuf;
      ignore (string st 0 lexbuf);
      lexbuf.Lexing.lex_start_p <- string_start;
      STRING (get_stored_string st) }

| _
    { raise (Illegal_character (pos2 (curr_location lexbuf),
                                Lexing.lexeme_char lexbuf 0))
    }

and string st nl = parse
    '"'
      { nl }
  | '\\' newline ([' ' '\t'] * as space)
      { update_loc lexbuf 1 (String.length space);
        string st nl lexbuf
      }
  | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
      { store_string_char st (char_for_backslash(Lexing.lexeme_char lexbuf 1));
        string st nl lexbuf }
  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
      { store_string_char st (char_for_decimal_code lexbuf 1);
        string st nl lexbuf }
  | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
      { store_string_char st (char_for_hexadecimal_code lexbuf 2);
        string st nl lexbuf }
  | newline
      { match st.comment_start_loc with
        | [] -> raise @@ Newline_in_string (pos2 (curr_location lexbuf))
        | _  -> Lexing.new_line lexbuf; string st (nl+1) lexbuf }
  | eof
      { raise @@ Unterminated_string (pos2 st.string_start_loc) }
  | _
      { store_string_char st (Lexing.lexeme_char lexbuf 0);
        string st nl lexbuf }


{

  let rec token st lexbuf =
    match st.buffer with
    | tok :: tokens ->
        st.buffer <- tokens;
        tok
    | [] ->
        match indent_tokens st 0 lexbuf with
        | [] -> raw_token st lexbuf
        | _ :: _ as tokens -> st.buffer <- tokens; token st lexbuf

}
First public release 2016-09-08 21:13:10 +04:00
			`{`

			`open Concrete_parser`

			`open Script_located_ir`

			`let count_nl s =`
			`let c = ref 0 in`
			`for i = 0 to String.length s - 1 do`
			`if Compare.Char.(s.[i] = '\010') then`
			`incr c`
			`done;`
			`!c`

			`let update_loc lexbuf nl indent =`
			`let open Lexing in`
			`let lcp = lexbuf.lex_curr_p in`
			`lexbuf.lex_curr_p <- { lcp with`
			`pos_lnum = lcp.pos_lnum + nl;`
			`pos_bol = lcp.pos_cnum - indent;`
			`}`

			`let may_update_loc lexbuf nl indent =`
			`if Compare.Int.(nl <> 0) then update_loc lexbuf nl indent`

			`let start_offset lexbuf =`
			`let open Lexing in`
			`let lsp = lexbuf.lex_start_p in`
			`lsp.pos_cnum - lsp.pos_bol`

			`let end_offset lexbuf =`
			`let open Lexing in`
			`let lcp = lexbuf.lex_curr_p in`
			`lcp.pos_cnum - lcp.pos_bol`

			`let curr_location lexbuf =`
			`lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p`

			`let pos pos =`
			`Lexing.(pos.pos_lnum, pos.pos_cnum - pos.pos_bol)`

			`let pos2 (start, stop) =`
			`pos start, pos stop`

			`(* To translate escape sequences *)`

			`let char_for_backslash = function`
			`\| 'n' -> '\010'`
			`\| 'r' -> '\013'`
			`\| 'b' -> '\008'`
			`\| 't' -> '\009'`
			`\| c -> c`

			`let char_for_decimal_code lexbuf i =`
			`let c = 100 * (int_of_char(Lexing.lexeme_char lexbuf i) - 48) +`
			`10 * (int_of_char(Lexing.lexeme_char lexbuf (i+1)) - 48) +`
			`(int_of_char(Lexing.lexeme_char lexbuf (i+2)) - 48) in`
			`if Compare.Int.(c < 0 \|\| c > 255) then`
			`raise @@ Illegal_escape (pos2 (curr_location lexbuf), Lexing.lexeme lexbuf)`
			`else char_of_int c`

			`let char_for_hexadecimal_code lexbuf i =`
			`let d1 = int_of_char (Lexing.lexeme_char lexbuf i) in`
			`let val1 = if Compare.Int.(d1 >= 97) then d1 - 87`
			`else if Compare.Int.(d1 >= 65) then d1 - 55`
			`else d1 - 48`
			`in`
			`let d2 = int_of_char (Lexing.lexeme_char lexbuf (i+1)) in`
			`let val2 = if Compare.Int.(d2 >= 97) then d2 - 87`
			`else if Compare.Int.(d2 >= 65) then d2 - 55`
			`else d2 - 48`
			`in`
			`char_of_int (val1 * 16 + val2)`

			`(** Lexer state *)`

			`type state = {`
			`mutable indent_stack:`
			(int * [`Indent \| `Open of (char * (Lexing.position * Lexing.position)) ]) list;
			`mutable buffer: Concrete_parser.token list;`
			`mutable string_buff: bytes;`
			`mutable string_index: int;`
			`mutable string_start_loc: Lexing.position * Lexing.position;`
			`mutable comment_start_loc: (Lexing.position * Lexing.position) list;`
			`}`

			`let init_state () = {`
			`indent_stack = [];`
			`buffer = [];`
			`string_index = 0;`
			`string_buff = Bytes.create 256;`
			`string_start_loc = Lexing.dummy_pos, Lexing.dummy_pos;`
			`comment_start_loc = [];`
			`}`


			`(** String helpers *)`

			`let reset_string_buffer st =`
			`st.string_buff <- Bytes.create 256;`
			`st.string_index <- 0`

			`let store_string_char st c =`
			`if st.string_index >= Bytes.length st.string_buff then begin`
			`let new_buff = Bytes.create (Bytes.length (st.string_buff) * 2) in`
			`Bytes.blit st.string_buff 0 new_buff 0 (Bytes.length st.string_buff);`
			`st.string_buff <- new_buff`
			`end;`
			`Bytes.set st.string_buff st.string_index c;`
			`st.string_index <- st.string_index + 1`

			`let store_string st s =`
			`for i = 0 to String.length s - 1 do`
			`store_string_char st s.[i];`
			`done`

			`let store_lexeme st lexbuf =`
			`store_string st (Lexing.lexeme lexbuf)`

			`let get_stored_string st =`
			`let s = Bytes.sub st.string_buff 0 st.string_index in`
			`st.string_buff <- Bytes.create 256;`
			`Bytes.to_string s`


			`(** Indentation helpers *)`

			`let first_token st =`
			`match st.indent_stack with`
			`\| [] -> true`
			`\| _ :: _ -> false`

			`let starting_offset (start, _) =`
			`let open Lexing in`
			`start.pos_cnum - start.pos_bol`

			`let rec pop_indent st loc xs i =`
			`match xs with`
			`\| [] -> assert false`
			`\| ((x, _) :: _) as xs when Compare.Int.(x = i) ->`
			`st.indent_stack <- xs;`
			`[NEWLINE]`
			\| (x, `Indent) :: xs ->
			`if Compare.Int.(x > i) then`
			`DEDENT :: pop_indent st loc xs i`
			`else`
			`raise @@ Invalid_indentation (pos2 loc)`
			\| (_, `Open (c, opener_loc)) :: _ ->
			`let opener_offset = starting_offset opener_loc in`
			`if Compare.Int.(i > opener_offset) then`
			`raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)`
			`else`
			`raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)`

			`let indent_token st loc =`
			`let i = starting_offset loc in`
			`match st.indent_stack with`
			\| (x, `Indent) :: xs when Compare.Int.(x > i) ->
			`DEDENT :: pop_indent st loc xs i;`
			\| (x, `Open (c, opener_loc)) :: _ when Compare.Int.(x > i) ->
			`let opener_offset = starting_offset opener_loc in`
			`if Compare.Int.(i > opener_offset) then`
			`raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)`
			`else`
			`raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)`
			`\| (x, _) :: _ when Compare.Int.(x = i) ->`
			`[NEWLINE]`
			`\| [] \| (_, _) :: _ (* when Compare.Int.(x < i) *) ->`
			st.indent_stack <- (i, `Indent) :: st.indent_stack;
			`[INDENT]`

			`let open_block st opener opener_loc token_offset =`
			`let opener_offset = starting_offset opener_loc in`
			`if Compare.Int.(token_offset <= opener_offset) then`
			`raise @@ Invalid_indentation_after_opener (pos2 opener_loc, opener) ;`
			`st.indent_stack <-`
			(token_offset, `Open (opener, opener_loc)) :: st.indent_stack;
			`match opener with`
			`\| '{' -> [LBRACE]`
			`\| '(' -> [LPAREN]`
			`\| _ -> assert false`

			`let close_block st bol closer closer_loc =`
			`let closer_offset = starting_offset closer_loc in`
			`let rec pop xs =`
			`match xs with`
			`\| [] -> raise @@ Unopened (pos2 closer_loc, closer)`
			\| (_, `Indent) :: xs -> DEDENT :: pop xs
			\| (_, `Open (opener, opener_loc)) :: xs ->
			`let opener_offset = starting_offset opener_loc in`
			`if bol && Compare.Int.(opener_offset <> closer_offset) then`
			`raise @@`
			`Unaligned_closer (pos2 closer_loc, opener, closer, pos2 opener_loc) ;`
			`st.indent_stack <- xs;`
			`[ match opener, closer with`
			`\| '{', '}' -> RBRACE`
			`\| '(', ')' -> RPAREN`
			`\| _ ->`
			`raise @@ Unclosed (pos2 closer_loc, opener, pos2 opener_loc) ]`
			`in`
			`pop st.indent_stack`

			`}`

			`let eol_comment = '#' [^ '\010'] *`
			`let newline = eol_comment ? ('\010' \| "\013\010" )`
			`let space = [' ']`
			`let firstidentchar = ['A'-'Z' 'a'-'z' '_']`
			`let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']`
			`let decimal_literal =`
			`['0'-'9'] ['0'-'9' '_']*`
			`let hex_literal =`
			`'0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*`
			`let oct_literal =`
			`'0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*`
			`let bin_literal =`
			`'0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*`
			`let int_literal =`
			`'-' ? ( decimal_literal \| hex_literal \| oct_literal \| bin_literal)`

			`rule indent_tokens st nl = parse`

			`\| space { indent_tokens st nl lexbuf }`
			`\| newline { Lexing.new_line lexbuf; indent_tokens st (nl + 1) lexbuf }`

			`\| ""`
			`{ let bol = nl <> 0 \|\| first_token st in`
			`if bol then indent_token st (curr_location lexbuf) else [] }`

			`\| "/*"`
			`{ st.comment_start_loc <- [curr_location lexbuf];`
			`comment st nl lexbuf }`

			`\| ('{' \| '(' as opener)`
			`{ let opener_loc = curr_location lexbuf in`
			`let token_offset = next_token_indent st lexbuf in`
			`let bol = nl <> 0 \|\| first_token st in`
			`let prefix =`
			`if bol then indent_token st opener_loc else [] in`
			`prefix @ open_block st opener opener_loc token_offset }`

			`\| ('}' \| ')' as closer)`
			`{ let closer_loc = curr_location lexbuf in`
			`let bol = Compare.Int.(nl <> 0) in`
			`close_block st bol closer closer_loc }`

			`\| eof`
			`{ List.map`
			`(function`
			\| (_, `Indent) -> DEDENT
			\| (_, `Open (c, loc)) ->
			`raise @@ Unclosed (pos2 (curr_location lexbuf), c, pos2 loc))`
			`st.indent_stack`
			`@ [EOF]`
			`}`

			`and comment st nl = parse`

			`\| "/*" { st.comment_start_loc <-`
			`curr_location lexbuf :: st.comment_start_loc;`
			`comment st nl lexbuf }`

			`\| "*/" { match st.comment_start_loc with`
			`\| [] -> assert false`
			`\| [_] -> indent_tokens st nl lexbuf`
			`\| _ :: xs -> st.comment_start_loc <- xs; comment st nl lexbuf }`

			`\| "\"" { st.string_start_loc <- curr_location lexbuf;`
			`let nl =`
			`try string st nl lexbuf`
			`with Unterminated_string str_start ->`
			`match st.comment_start_loc with`
			`\| [] -> assert false`
			`\| loc :: _ ->`
			`let start = List.hd (List.rev st.comment_start_loc) in`
			`raise @@`
			`Unterminated_string_in_comment (pos2 loc, pos2 start, str_start)`
			`in`
			`comment st nl lexbuf }`

			`\| newline { Lexing.new_line lexbuf; comment st (nl+1) lexbuf }`

			`\| eof { match st.comment_start_loc with`
			`\| [] -> assert false`
			`\| loc :: _ ->`
			`let start = List.hd (List.rev st.comment_start_loc) in`
			`raise @@ Unterminated_comment (pos2 loc, pos2 start) }`

			`\| _ { comment st nl lexbuf }`


			`(** Eat spacings and return the next token offset. *)`
			`and next_token_indent st = parse`

			`\| space { next_token_indent st lexbuf }`

			`\| newline { Lexing.new_line lexbuf; next_token_indent st lexbuf }`

			`\| "" { end_offset lexbuf }`

			`(** The lexer for non-indentation tokens.`
			`It should not care about 'space', 'newline', '{}()' nor comments. *)`
			`and raw_token st = parse`

			`\| ";" { SEMICOLON }`

			`\| firstidentchar identchar *`
Michelson: enforce case sensitivity. 2016-12-14 21:05:09 +04:00			`{ PRIM (Lexing.lexeme lexbuf) }`
First public release 2016-09-08 21:13:10 +04:00
			`\| int_literal`
			`{ INT (Lexing.lexeme lexbuf) }`

			`\| "\""`
			`{ reset_string_buffer st;`
			`let string_start = lexbuf.Lexing.lex_start_p in`
			`st.string_start_loc <- curr_location lexbuf;`
			`ignore (string st 0 lexbuf);`
			`lexbuf.Lexing.lex_start_p <- string_start;`
			`STRING (get_stored_string st) }`

			`\| _`
			`{ raise (Illegal_character (pos2 (curr_location lexbuf),`
			`Lexing.lexeme_char lexbuf 0))`
			`}`

			`and string st nl = parse`
			`'"'`
			`{ nl }`
			`\| '\\' newline ([' ' '\t'] * as space)`
			`{ update_loc lexbuf 1 (String.length space);`
			`string st nl lexbuf`
			`}`
			`\| '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']`
			`{ store_string_char st (char_for_backslash(Lexing.lexeme_char lexbuf 1));`
			`string st nl lexbuf }`
			`\| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']`
			`{ store_string_char st (char_for_decimal_code lexbuf 1);`
			`string st nl lexbuf }`
			`\| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']`
			`{ store_string_char st (char_for_hexadecimal_code lexbuf 2);`
			`string st nl lexbuf }`
			`\| newline`
			`{ match st.comment_start_loc with`
			`\| [] -> raise @@ Newline_in_string (pos2 (curr_location lexbuf))`
			`\| _ -> Lexing.new_line lexbuf; string st (nl+1) lexbuf }`
			`\| eof`
			`{ raise @@ Unterminated_string (pos2 st.string_start_loc) }`
			`\| _`
			`{ store_string_char st (Lexing.lexeme_char lexbuf 0);`
			`string st nl lexbuf }`


			`{`

			`let rec token st lexbuf =`
			`match st.buffer with`
			`\| tok :: tokens ->`
			`st.buffer <- tokens;`
			`tok`
			`\| [] ->`
			`match indent_tokens st 0 lexbuf with`
			`\| [] -> raw_token st lexbuf`
			`\| _ :: _ as tokens -> st.buffer <- tokens; token st lexbuf`

			`}`