From f3777b4af8458d50e0cb2170b964352bf2064533 Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Fri, 24 Apr 2020 21:06:18 +0200 Subject: [PATCH] The preprocessor library depends now on the kinds of comments instead of a closed set of languages. I also removed the offsets: I simply use the current region to determine whether the preprocessing directie starts at the beginning of a line. I also removed scanning line indicators, to make the lexer simpler. LexToken.mll: Moved the function [check_right_context] that checks stylistic constraints from Lexer.mll to LexToken.mll. While this triplicates code (as CameLIGO, PascaLIGO and ReasonLIGO share the same constraints), the benefit is that Lexer.mll becomes more generic and the signature for the TOKEN module is simpler (no more exporting predicates, except for EOF). In accordance with the change of the preprocessor, the lexers and parsers for LIGO now depend on the kind of comments, not a fixed set of syntaxes. This gives more versatility when adding a new language: only the kinds of its comments are needed, although Lexer.mll and Preproc.mll may have to be modified if they do not already know the comment delimiters, for example line comments starting with #. **************************************************************** BUG: The exceptions coming from LexToken.mll when a stylistic constraint is broken in [LexToken.check_right_context] are not caught yet. **************************************************************** Lexer.mll: I moved out as much as I could from the header into a new module LexerLib. The aim is to make it easy to reuse as much as possible of the lexer machinerie, when it cannot be used as is. --- src/passes/1-parser/cameligo.ml | 30 +- src/passes/1-parser/cameligo/LexToken.mli | 25 +- src/passes/1-parser/cameligo/LexToken.mll | 115 +-- src/passes/1-parser/cameligo/LexerMain.ml | 7 +- src/passes/1-parser/cameligo/ParserMain.ml | 14 +- src/passes/1-parser/pascaligo.ml | 30 +- src/passes/1-parser/pascaligo/.links | 2 + src/passes/1-parser/pascaligo/LexToken.mli | 25 +- src/passes/1-parser/pascaligo/LexToken.mll | 154 ++-- src/passes/1-parser/pascaligo/LexerMain.ml | 5 +- src/passes/1-parser/pascaligo/ParserMain.ml | 14 +- src/passes/1-parser/reasonligo.ml | 30 +- src/passes/1-parser/reasonligo/LexToken.mli | 25 +- src/passes/1-parser/reasonligo/LexToken.mll | 109 +-- src/passes/1-parser/reasonligo/LexerMain.ml | 7 +- src/passes/1-parser/reasonligo/Parser.mly | 44 +- src/passes/1-parser/reasonligo/ParserMain.ml | 14 +- src/passes/1-parser/shared/.links | 5 - src/passes/1-parser/shared/EvalOpt.ml | 49 +- src/passes/1-parser/shared/EvalOpt.mli | 26 +- src/passes/1-parser/shared/Lexer.mli | 88 +- src/passes/1-parser/shared/Lexer.mll | 852 ++++++------------- src/passes/1-parser/shared/LexerLib.ml | 451 ++++++++++ src/passes/1-parser/shared/LexerLog.ml | 11 +- src/passes/1-parser/shared/LexerLog.mli | 7 +- src/passes/1-parser/shared/LexerUnit.ml | 65 +- src/passes/1-parser/shared/LexerUnit.mli | 2 +- src/passes/1-parser/shared/ParserAPI.ml | 8 +- src/passes/1-parser/shared/ParserAPI.mli | 2 +- src/passes/1-parser/shared/ParserUnit.ml | 26 +- src/passes/1-parser/shared/ParserUnit.mli | 21 +- src/passes/1-parser/shared/dune | 1 + 32 files changed, 1193 insertions(+), 1071 deletions(-) create mode 100644 src/passes/1-parser/shared/LexerLib.ml diff --git a/src/passes/1-parser/cameligo.ml b/src/passes/1-parser/cameligo.ml index 3ae2063c1..79093af97 100644 --- a/src/passes/1-parser/cameligo.ml +++ b/src/passes/1-parser/cameligo.ml @@ -8,15 +8,14 @@ module SSet = Set.Make (String) (* Mock IOs TODO: Fill them with CLI options *) -type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO] - module SubIO = struct type options = < libs : string list; verbose : SSet.t; offsets : bool; - lang : language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; (* ".mligo" *) mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -24,22 +23,25 @@ module SubIO = > let options : options = - object - method libs = [] - method verbose = SSet.empty - method offsets = true - method lang = `CameLIGO - method ext = ".mligo" - method mode = `Point - method cmd = EvalOpt.Quiet - method mono = false - end + let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)" + in object + method libs = [] + method verbose = SSet.empty + method offsets = true + method block = Some block + method line = Some "//" + method ext = ".mligo" + method mode = `Point + method cmd = EvalOpt.Quiet + method mono = false + end let make = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/cameligo/LexToken.mli b/src/passes/1-parser/cameligo/LexToken.mli index 26cd6416a..4af326823 100644 --- a/src/passes/1-parser/cameligo/LexToken.mli +++ b/src/passes/1-parser/cameligo/LexToken.mli @@ -156,11 +156,22 @@ val eof : Region.t -> token (* Predicates *) -val is_string : token -> bool -val is_bytes : token -> bool -val is_int : token -> bool -val is_ident : token -> bool -val is_kwd : token -> bool -val is_constr : token -> bool -val is_sym : token -> bool val is_eof : token -> bool + +(* Style *) + +type error + +val error_to_string : error -> string + +exception Error of error Region.reg + +val format_error : + ?offsets:bool -> [`Byte | `Point] -> + error Region.reg -> file:bool -> string Region.reg + +val check_right_context : + token -> + (Lexing.lexbuf -> (Markup.t list * token) option) -> + Lexing.lexbuf -> + unit diff --git a/src/passes/1-parser/cameligo/LexToken.mll b/src/passes/1-parser/cameligo/LexToken.mll index a9dc9cfe1..bb44c4e78 100644 --- a/src/passes/1-parser/cameligo/LexToken.mll +++ b/src/passes/1-parser/cameligo/LexToken.mll @@ -480,81 +480,60 @@ let mk_attr header lexeme region = (* Predicates *) -let is_string = function - String _ -> true -| _ -> false +let is_string = function String _ -> true | _ -> false +let is_bytes = function Bytes _ -> true | _ -> false +let is_int = function Int _ -> true | _ -> false +let is_ident = function Ident _ -> true | _ -> false +let is_eof = function EOF _ -> true | _ -> false -let is_bytes = function - Bytes _ -> true -| _ -> false +(* Errors *) -let is_int = function - Int _ -> true -| _ -> false +type error = + Odd_lengthed_bytes +| Missing_break -let is_ident = function - Ident _ -> true -| _ -> false +let error_to_string = function + Odd_lengthed_bytes -> + "The length of the byte sequence is an odd number.\n\ + Hint: Add or remove a digit." +| Missing_break -> + "Missing break.\n\ + Hint: Insert some space." -let is_kwd = function - | Begin _ - | Else _ - | End _ - | False _ - | Fun _ - | Rec _ - | If _ - | In _ - | Let _ - | Match _ - | Mod _ - | Not _ - | Of _ - | Or _ - | Then _ - | True _ - | Type _ - | With _ -> true - | _ -> false +exception Error of error Region.reg -let is_constr = function -| Constr _ -| Ident _ -| False _ -| True _ -> true -| _ -> false +let format_error ?(offsets=true) mode Region.{region; value} ~file = + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in + let value = sprintf "Lexical error %s:\n%s\n" reg msg + in Region.{value; region} -let is_sym = function -| ARROW _ -| CONS _ -| CAT _ -| MINUS _ -| PLUS _ -| SLASH _ -| TIMES _ -| LPAR _ -| RPAR _ -| LBRACKET _ -| RBRACKET _ -| LBRACE _ -| RBRACE _ -| COMMA _ -| SEMI _ -| VBAR _ -| COLON _ -| DOT _ -| WILD _ -| EQ _ -| NE _ -| LT _ -| GT _ -| LE _ -| GE _ -| BOOL_OR _ -| BOOL_AND _ -> true -| _ -> false +let fail region value = raise (Error Region.{region; value}) -let is_eof = function EOF _ -> true | _ -> false +let check_right_context token next_token buffer : unit = + if not (is_eof token) then + if is_int token || is_bytes token then + match next_token buffer with + Some ([], next) -> + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos in + if is_int next then + fail region Odd_lengthed_bytes + else + if is_ident next || is_string next || is_bytes next then + fail region Missing_break + | Some (_::_, _) | None -> () + else + if is_ident token || is_string token then + match next_token buffer with + Some ([], next) -> + if is_ident next || is_string next + || is_bytes next || is_int next + then + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos + in fail region Missing_break + | Some (_::_, _) | None -> () (* END TRAILER *) } diff --git a/src/passes/1-parser/cameligo/LexerMain.ml b/src/passes/1-parser/cameligo/LexerMain.ml index 2a281efd5..498098035 100644 --- a/src/passes/1-parser/cameligo/LexerMain.ml +++ b/src/passes/1-parser/cameligo/LexerMain.ml @@ -4,7 +4,10 @@ module Region = Simple_utils.Region module IO = struct - let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"(*" ~closing:"*)" + in read ~block ~line:"//" ".mligo" end module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) @@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) let () = match M.trace () with Stdlib.Ok () -> () - | Error Region.{value; _} -> Utils.highlight value + | Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value diff --git a/src/passes/1-parser/cameligo/ParserMain.ml b/src/passes/1-parser/cameligo/ParserMain.ml index bc47d9199..a3d13f3cc 100644 --- a/src/passes/1-parser/cameligo/ParserMain.ml +++ b/src/passes/1-parser/cameligo/ParserMain.ml @@ -5,7 +5,10 @@ module SSet = Set.Make (String) module IO = struct - let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"(*" ~closing:"*)" + in read ~block ~line:"//" ".mligo" end module SubIO = @@ -14,7 +17,8 @@ module SubIO = libs : string list; verbose : SSet.t; offsets : bool; - lang : EvalOpt.language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -26,7 +30,8 @@ module SubIO = method libs = IO.options#libs method verbose = IO.options#verbose method offsets = IO.options#offsets - method lang = IO.options#lang + method block = IO.options#block + method line = IO.options#line method ext = IO.options#ext method mode = IO.options#mode method cmd = IO.options#cmd @@ -37,7 +42,8 @@ module SubIO = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/pascaligo.ml b/src/passes/1-parser/pascaligo.ml index 6e4759fe8..02b8f462e 100644 --- a/src/passes/1-parser/pascaligo.ml +++ b/src/passes/1-parser/pascaligo.ml @@ -8,15 +8,14 @@ module SSet = Set.Make (String) (* Mock IOs TODO: Fill them with CLI options *) -type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO] - module SubIO = struct type options = < libs : string list; verbose : SSet.t; offsets : bool; - lang : language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; (* ".ligo" *) mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -24,22 +23,25 @@ module SubIO = > let options : options = - object - method libs = [] - method verbose = SSet.empty - method offsets = true - method lang = `PascaLIGO - method ext = ".ligo" - method mode = `Point - method cmd = EvalOpt.Quiet - method mono = false - end + let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)" + in object + method libs = [] + method verbose = SSet.empty + method offsets = true + method block = Some block + method line = Some "//" + method ext = ".ligo" + method mode = `Point + method cmd = EvalOpt.Quiet + method mono = false + end let make = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/pascaligo/.links b/src/passes/1-parser/pascaligo/.links index 0b836a2d9..fed453f99 100644 --- a/src/passes/1-parser/pascaligo/.links +++ b/src/passes/1-parser/pascaligo/.links @@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile ../shared/Lexer.mli ../shared/Lexer.mll +../shared/LexerLib.ml ../shared/EvalOpt.ml ../shared/EvalOpt.mli ../shared/FQueue.ml @@ -18,5 +19,6 @@ $HOME/git/OCaml-build/Makefile ../shared/LexerUnit.ml ../shared/ParserUnit.mli ../shared/ParserUnit.ml +../shared/LexerLib.ml $HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml diff --git a/src/passes/1-parser/pascaligo/LexToken.mli b/src/passes/1-parser/pascaligo/LexToken.mli index 60479955f..88adbff37 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mli +++ b/src/passes/1-parser/pascaligo/LexToken.mli @@ -163,11 +163,22 @@ val eof : Region.t -> token (* Predicates *) -val is_string : token -> bool -val is_bytes : token -> bool -val is_int : token -> bool -val is_ident : token -> bool -val is_kwd : token -> bool -val is_constr : token -> bool -val is_sym : token -> bool val is_eof : token -> bool + +(* Style *) + +type error + +val error_to_string : error -> string + +exception Error of error Region.reg + +val format_error : + ?offsets:bool -> [`Byte | `Point] -> + error Region.reg -> file:bool -> string Region.reg + +val check_right_context : + token -> + (Lexing.lexbuf -> (Markup.t list * token) option) -> + Lexing.lexbuf -> + unit diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index 95035cf37..f300187fe 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -483,17 +483,16 @@ type nat_err = | Non_canonical_zero_nat let mk_nat lexeme region = - match (String.index_opt lexeme 'n') with - | None -> Error Invalid_natural - | Some _ -> ( - let z = - Str.(global_replace (regexp "_") "" lexeme) |> - Str.(global_replace (regexp "n") "") |> - Z.of_string in - if Z.equal z Z.zero && lexeme <> "0n" - then Error Non_canonical_zero_nat - else Ok (Nat Region.{region; value = lexeme,z}) - ) + match String.index_opt lexeme 'n' with + None -> Error Invalid_natural + | Some _ -> + let z = + Str.(global_replace (regexp "_") "" lexeme) |> + Str.(global_replace (regexp "n") "") |> + Z.of_string in + if Z.equal z Z.zero && lexeme <> "0n" + then Error Non_canonical_zero_nat + else Ok (Nat Region.{region; value = lexeme,z}) let mk_mutez lexeme region = let z = @@ -563,99 +562,60 @@ let mk_attr _header _string _region = (* Predicates *) -let is_string = function - String _ -> true -| _ -> false +let is_string = function String _ -> true | _ -> false +let is_bytes = function Bytes _ -> true | _ -> false +let is_int = function Int _ -> true | _ -> false +let is_ident = function Ident _ -> true | _ -> false +let is_eof = function EOF _ -> true | _ -> false -let is_bytes = function - Bytes _ -> true -| _ -> false +(* Errors *) -let is_int = function - Int _ -> true -| _ -> false +type error = + Odd_lengthed_bytes +| Missing_break -let is_ident = function - Ident _ -> true -| _ -> false +let error_to_string = function + Odd_lengthed_bytes -> + "The length of the byte sequence is an odd number.\n\ + Hint: Add or remove a digit." +| Missing_break -> + "Missing break.\n\ + Hint: Insert some space." -let is_kwd = function - And _ -| Attributes _ -| Begin _ -| BigMap _ -| Block _ -| Case _ -| Const _ -| Contains _ -| Else _ -| End _ -| False _ -| For _ -| From _ -| Function _ -| If _ -| In _ -| Is _ -| List _ -| Map _ -| Mod _ -| Nil _ -| Not _ -| Of _ -| Or _ -| Patch _ -| Record _ -| Remove _ -| Set _ -| Skip _ -| Step _ -| Then _ -| To _ -| True _ -| Type _ -| Unit _ -| Var _ -| While _ -| With _ -> true -| _ -> false +exception Error of error Region.reg -let is_constr = function - Constr _ -| C_None _ -| C_Some _ -> true -| _ -> false +let format_error ?(offsets=true) mode Region.{region; value} ~file = + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in + let value = sprintf "Lexical error %s:\n%s\n" reg msg + in Region.{value; region} -let is_sym = function - SEMI _ -| COMMA _ -| LPAR _ -| RPAR _ -| LBRACE _ -| RBRACE _ -| LBRACKET _ -| RBRACKET _ -| CONS _ -| VBAR _ -| ARROW _ -| ASS _ -| EQ _ -| COLON _ -| LT _ -| LE _ -| GT _ -| GE _ -| NE _ -| PLUS _ -| MINUS _ -| SLASH _ -| TIMES _ -| DOT _ -| WILD _ -| CAT _ -> true -| _ -> false +let fail region value = raise (Error Region.{region; value}) -let is_eof = function EOF _ -> true | _ -> false +let check_right_context token next_token buffer : unit = + if not (is_eof token) then + if is_int token || is_bytes token then + match next_token buffer with + Some ([], next) -> + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos in + if is_int next then + fail region Odd_lengthed_bytes + else + if is_ident next || is_string next || is_bytes next then + fail region Missing_break + | Some (_::_, _) | None -> () + else + if is_ident token || is_string token then + match next_token buffer with + Some ([], next) -> + if is_ident next || is_string next + || is_bytes next || is_int next + then + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos + in fail region Missing_break + | Some (_::_, _) | None -> () (* END TRAILER *) } diff --git a/src/passes/1-parser/pascaligo/LexerMain.ml b/src/passes/1-parser/pascaligo/LexerMain.ml index 3c8d7c642..a497df466 100644 --- a/src/passes/1-parser/pascaligo/LexerMain.ml +++ b/src/passes/1-parser/pascaligo/LexerMain.ml @@ -4,7 +4,10 @@ module Region = Simple_utils.Region module IO = struct - let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"(*" ~closing:"*)" + in read ~block ~line:"//" ".ligo" end module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index c94ca806d..beb0b4885 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -5,7 +5,10 @@ module SSet = Set.Make (String) module IO = struct - let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"(*" ~closing:"*)" + in read ~block ~line:"//" ".ligo" end module SubIO = @@ -14,7 +17,8 @@ module SubIO = libs : string list; verbose : SSet.t; offsets : bool; - lang : EvalOpt.language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -26,7 +30,8 @@ module SubIO = method libs = IO.options#libs method verbose = IO.options#verbose method offsets = IO.options#offsets - method lang = IO.options#lang + method block = IO.options#block + method line = IO.options#line method ext = IO.options#ext method mode = IO.options#mode method cmd = IO.options#cmd @@ -37,7 +42,8 @@ module SubIO = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/reasonligo.ml b/src/passes/1-parser/reasonligo.ml index 85f9557e4..1af70c927 100644 --- a/src/passes/1-parser/reasonligo.ml +++ b/src/passes/1-parser/reasonligo.ml @@ -11,15 +11,14 @@ module SSet = Set.Make (String) (* Mock IOs TODO: Fill them with CLI options *) -type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO] - module SubIO = struct type options = < libs : string list; verbose : SSet.t; offsets : bool; - lang : language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; (* ".religo" *) mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -27,22 +26,25 @@ module SubIO = > let options : options = - object - method libs = [] - method verbose = SSet.empty - method offsets = true - method lang = `ReasonLIGO - method ext = ".religo" - method mode = `Point - method cmd = EvalOpt.Quiet - method mono = false - end + let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/" + in object + method libs = [] + method verbose = SSet.empty + method offsets = true + method block = Some block + method line = Some "//" + method ext = ".religo" + method mode = `Point + method cmd = EvalOpt.Quiet + method mono = false + end let make = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/reasonligo/LexToken.mli b/src/passes/1-parser/reasonligo/LexToken.mli index 4a70d9d13..240e824d2 100644 --- a/src/passes/1-parser/reasonligo/LexToken.mli +++ b/src/passes/1-parser/reasonligo/LexToken.mli @@ -154,11 +154,22 @@ val eof : Region.t -> token (* Predicates *) -val is_string : token -> bool -val is_bytes : token -> bool -val is_int : token -> bool -val is_ident : token -> bool -val is_kwd : token -> bool -val is_constr : token -> bool -val is_sym : token -> bool val is_eof : token -> bool + +(* Style *) + +type error + +val error_to_string : error -> string + +exception Error of error Region.reg + +val format_error : + ?offsets:bool -> [`Byte | `Point] -> + error Region.reg -> file:bool -> string Region.reg + +val check_right_context : + token -> + (Lexing.lexbuf -> (Markup.t list * token) option) -> + Lexing.lexbuf -> + unit diff --git a/src/passes/1-parser/reasonligo/LexToken.mll b/src/passes/1-parser/reasonligo/LexToken.mll index cd05538e7..92aec58e3 100644 --- a/src/passes/1-parser/reasonligo/LexToken.mll +++ b/src/passes/1-parser/reasonligo/LexToken.mll @@ -460,75 +460,60 @@ let mk_attr header lexeme region = (* Predicates *) -let is_string = function - String _ -> true -| _ -> false +let is_string = function String _ -> true | _ -> false +let is_bytes = function Bytes _ -> true | _ -> false +let is_int = function Int _ -> true | _ -> false +let is_ident = function Ident _ -> true | _ -> false +let is_eof = function EOF _ -> true | _ -> false -let is_bytes = function - Bytes _ -> true -| _ -> false +(* Errors *) -let is_int = function - Int _ -> true -| _ -> false +type error = + Odd_lengthed_bytes +| Missing_break -let is_ident = function - Ident _ -> true -| _ -> false +let error_to_string = function + Odd_lengthed_bytes -> + "The length of the byte sequence is an odd number.\n\ + Hint: Add or remove a digit." +| Missing_break -> + "Missing break.\n\ + Hint: Insert some space." -let is_kwd = function - Else _ -| False _ -| If _ -| Let _ -| Rec _ -| Switch _ -| Mod _ -| Or _ -| True _ -| Type _ -> true -| _ -> false +exception Error of error Region.reg -let is_constr = function - Constr _ -| Ident _ -| False _ -| True _ -> true -| _ -> false +let format_error ?(offsets=true) mode Region.{region; value} ~file = + let msg = error_to_string value + and reg = region#to_string ~file ~offsets mode in + let value = sprintf "Lexical error %s:\n%s\n" reg msg + in Region.{value; region} -let is_sym = function - CAT _ -| MINUS _ -| PLUS _ -| SLASH _ -| TIMES _ -| LPAR _ -| RPAR _ -| LBRACKET _ -| RBRACKET _ -| LBRACE _ -| RBRACE _ -| COMMA _ -| SEMI _ -| VBAR _ -| COLON _ -| DOT _ -| ELLIPSIS _ -| WILD _ -| EQ _ -| EQEQ _ -| NE _ -| LT _ -| GT _ -| LE _ -| GE _ -| ARROW _ -| BOOL_OR _ -| NOT _ -| BOOL_AND _ -> true -| _ -> false +let fail region value = raise (Error Region.{region; value}) -let is_eof = function EOF _ -> true | _ -> false +let check_right_context token next_token buffer : unit = + if not (is_eof token) then + if is_int token || is_bytes token then + match next_token buffer with + Some ([], next) -> + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos in + if is_int next then + fail region Odd_lengthed_bytes + else + if is_ident next || is_string next || is_bytes next then + fail region Missing_break + | Some (_::_, _) | None -> () + else + if is_ident token || is_string token then + match next_token buffer with + Some ([], next) -> + if is_ident next || is_string next + || is_bytes next || is_int next + then + let pos = (to_region token)#stop in + let region = Region.make ~start:pos ~stop:pos + in fail region Missing_break + | Some (_::_, _) | None -> () (* END TRAILER *) } diff --git a/src/passes/1-parser/reasonligo/LexerMain.ml b/src/passes/1-parser/reasonligo/LexerMain.ml index 4f063582f..f0d6fdfa1 100644 --- a/src/passes/1-parser/reasonligo/LexerMain.ml +++ b/src/passes/1-parser/reasonligo/LexerMain.ml @@ -4,7 +4,10 @@ module Region = Simple_utils.Region module IO = struct - let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"/*" ~closing:"*/" + in read ~block ~line:"//" ".religo" end module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) @@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) let () = match M.trace () with Stdlib.Ok () -> () - | Error Region.{value; _} -> Utils.highlight value + | Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value diff --git a/src/passes/1-parser/reasonligo/Parser.mly b/src/passes/1-parser/reasonligo/Parser.mly index 7a1c02ada..9e6e77799 100644 --- a/src/passes/1-parser/reasonligo/Parser.mly +++ b/src/passes/1-parser/reasonligo/Parser.mly @@ -40,11 +40,11 @@ let rec curry hd = function in TFun {value; region} | [] -> hd -let wild_error e = - match e with - | EVar { value = "_"; _} as e -> +let wild_error e = + match e with + | EVar { value = "_"; _} as e -> let open! SyntaxError in - raise (Error (InvalidWild e)) + raise (Error (InvalidWild e)) | _ -> () (* END HEADER *) @@ -270,30 +270,30 @@ let_declaration: let_binding: "" type_annotation? "=" expr { - wild_error $4; + wild_error $4; Scoping.check_reserved_name $1; {binders = PVar $1, []; lhs_type=$2; eq=$3; let_rhs=$4} } | "_" type_annotation? "=" expr { - wild_error $4; + wild_error $4; {binders = PWild $1, []; lhs_type=$2; eq=$3; let_rhs=$4} } | unit type_annotation? "=" expr { - wild_error $4; + wild_error $4; {binders = PUnit $1, []; lhs_type=$2; eq=$3; let_rhs=$4} } | record_pattern type_annotation? "=" expr { - wild_error $4; + wild_error $4; Scoping.check_pattern (PRecord $1); {binders = PRecord $1, []; lhs_type=$2; eq=$3; let_rhs=$4} } | par(closed_irrefutable) type_annotation? "=" expr { - wild_error $4; + wild_error $4; Scoping.check_pattern $1.value.inside; {binders = PPar $1, []; lhs_type=$2; eq=$3; let_rhs=$4} } | tuple(sub_irrefutable) type_annotation? "=" expr { - wild_error $4; + wild_error $4; Utils.nsepseq_iter Scoping.check_pattern $1; let hd, tl = $1 in let start = pattern_to_region hd in @@ -422,7 +422,7 @@ expr: base_cond__open(expr) | switch_expr(base_cond) { $1 } base_cond__open(x): - base_expr(x) | conditional(expr_with_let_expr) { + base_expr(x) | conditional(expr_with_let_expr) { wild_error $1; $1 } @@ -460,11 +460,11 @@ fun_expr: let region = cover start stop in let rec arg_to_pattern = function - EVar v -> + EVar v -> if v.value = "_" then PWild v.region else ( - Scoping.check_reserved_name v; + Scoping.check_reserved_name v; PVar v ) | EAnnot {region; value = {inside = EVar v, colon, typ; _}} -> @@ -895,7 +895,7 @@ path: | projection { Path $1 } update_record: - "{""..."path "," sep_or_term_list(field_path_assignment,",") "}" { + "{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" { let region = cover $1 $6 in let ne_elements, terminator = $5 in let value = { @@ -913,7 +913,7 @@ expr_with_let_expr: expr { $1 } | let_expr(expr_with_let_expr) { $1 } -more_field_assignments: +more_field_assignments: "," sep_or_term_list(field_assignment_punning,",") { let elts, _region = $2 in $1, elts @@ -926,9 +926,9 @@ sequence_or_record_in: PaSequence {s_elts = elts; s_terminator=None} } | field_assignment more_field_assignments? { - match $2 with - | Some (comma, elts) -> - let r_elts = Utils.nsepseq_cons $1 comma elts in + match $2 with + | Some (comma, elts) -> + let r_elts = Utils.nsepseq_cons $1 comma elts in PaRecord {r_elts; r_terminator = None} | None -> PaRecord {r_elts = ($1, []); r_terminator = None} @@ -950,15 +950,15 @@ sequence_or_record: terminator = r.r_terminator} in ERecord {region; value}} -field_assignment_punning: - (* This can only happen with multiple fields - +field_assignment_punning: + (* This can only happen with multiple fields - one item punning does NOT work in ReasonML *) - field_name { + field_name { let value = { field_name = $1; assignment = ghost; field_expr = EVar $1 } - in + in {$1 with value} } | field_assignment { diff --git a/src/passes/1-parser/reasonligo/ParserMain.ml b/src/passes/1-parser/reasonligo/ParserMain.ml index 82ffc7b32..1c173bee0 100644 --- a/src/passes/1-parser/reasonligo/ParserMain.ml +++ b/src/passes/1-parser/reasonligo/ParserMain.ml @@ -5,7 +5,10 @@ module SSet = Set.Make (String) module IO = struct - let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo") + let options = + let open EvalOpt in + let block = mk_block ~opening:"/*" ~closing:"*/" + in read ~block ~line:"//" ".religo" end module SubIO = @@ -14,7 +17,8 @@ module SubIO = libs : string list; verbose : SSet.t; offsets : bool; - lang : EvalOpt.language; + block : EvalOpt.block_comment option; + line : EvalOpt.line_comment option; ext : string; mode : [`Byte | `Point]; cmd : EvalOpt.command; @@ -26,7 +30,8 @@ module SubIO = method libs = IO.options#libs method verbose = IO.options#verbose method offsets = IO.options#offsets - method lang = IO.options#lang + method block = IO.options#block + method line = IO.options#line method ext = IO.options#ext method mode = IO.options#mode method cmd = IO.options#cmd @@ -37,7 +42,8 @@ module SubIO = EvalOpt.make ~libs:options#libs ~verbose:options#verbose ~offsets:options#offsets - ~lang:options#lang + ?block:options#block + ?line:options#line ~ext:options#ext ~mode:options#mode ~cmd:options#cmd diff --git a/src/passes/1-parser/shared/.links b/src/passes/1-parser/shared/.links index df8a82cd9..b29b57639 100644 --- a/src/passes/1-parser/shared/.links +++ b/src/passes/1-parser/shared/.links @@ -1,7 +1,2 @@ $HOME/git/OCaml-build/Makefile $HOME/git/OCaml-build/Makefile.cfg - -$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli -$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml -$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli -$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml diff --git a/src/passes/1-parser/shared/EvalOpt.ml b/src/passes/1-parser/shared/EvalOpt.ml index 54d971846..8cb22608d 100644 --- a/src/passes/1-parser/shared/EvalOpt.ml +++ b/src/passes/1-parser/shared/EvalOpt.ml @@ -5,37 +5,42 @@ type command = Quiet | Copy | Units | Tokens -type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO] - -let lang_to_string = function - `PascaLIGO -> "PascaLIGO" -| `CameLIGO -> "CameLIGO" -| `ReasonLIGO -> "ReasonLIGO" - (* The type [options] gathers the command-line options. *) module SSet = Set.Make (String) +type line_comment = string (* Opening of a line comment *) +type block_comment = + +let mk_block ~opening ~closing : block_comment = + object + method opening = opening + method closing = closing + end + type options = < input : string option; libs : string list; verbose : SSet.t; offsets : bool; - lang : language; - ext : string; (* ".ligo", ".mligo", ".religo" *) + block : block_comment option; + line : line_comment option; + ext : string; mode : [`Byte | `Point]; cmd : command; mono : bool; expr : bool > -let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options = +let make ~input ~libs ~verbose ~offsets ?block + ?line ~ext ~mode ~cmd ~mono ~expr : options = object method input = input method libs = libs method verbose = verbose method offsets = offsets - method lang = lang + method block = block + method line = line method ext = ext method mode = mode method cmd = cmd @@ -58,10 +63,10 @@ let abort msg = (* Help *) -let help language extension () = +let help extension () = let file = Filename.basename Sys.argv.(0) in printf "Usage: %s [