ligo/tools/lsp/squirrel/src/Parser.hs

422 lines
9.7 KiB
Haskell
Raw Normal View History

2020-04-30 14:39:51 +04:00
2020-06-01 22:02:16 +04:00
{- |
2020-06-04 13:48:04 +04:00
The thing that can untangle the mess that TreeSitter produces.
2020-05-08 01:18:26 +04:00
2020-06-01 22:02:16 +04:00
In presence of serious errors, it /will/ be a mess, anyway.
2020-05-08 01:18:26 +04:00
2020-06-01 22:02:16 +04:00
The AST you are building must be the @Tree@ in each point.
2020-05-08 01:18:26 +04:00
2020-06-01 22:02:16 +04:00
I recommend, in your tree-sitter grammar, to add `field("foo", ...)`
2020-05-08 01:18:26 +04:00
to each sub-rule, that has `$.` in front of it - in a rule, that doesn't
start with `_` in its name.
As a general rule of thumb, make each significant part a separate rule,
even if it is a keyword. Then, apply previous advice.
Only make rule start with `_` if it is a pure choice.
2020-06-01 22:02:16 +04:00
> ('block'
> ...
> a: <a>
> ...
> b: <b>
> ...)
2020-05-08 01:18:26 +04:00
->
2020-06-01 22:02:16 +04:00
> block = do
> subtree "block" do
> ranged do
> pure Block
> <*> inside "a" a
> <*> inside "b" b
2020-05-08 01:18:26 +04:00
-}
2020-05-08 22:14:09 +04:00
module Parser
2020-06-04 13:48:04 +04:00
( -- * Parser type
Parser
2020-05-08 22:14:09 +04:00
, runParser
, debugParser
2020-06-04 13:48:04 +04:00
-- * Combinators
2020-05-08 22:14:09 +04:00
, subtree
, anything
, token
2020-06-04 13:48:04 +04:00
, stubbed
2020-06-01 18:17:33 +04:00
, getInfo
2020-05-08 22:14:09 +04:00
, inside
, restart
2020-06-04 13:48:04 +04:00
2020-06-04 17:16:04 +04:00
-- * Error
, die
2020-06-04 13:48:04 +04:00
-- * Replacement for `Alternative`, because reasons
2020-05-08 22:14:09 +04:00
, many
, some
, (<|>)
, optional
, select
2020-06-04 13:48:04 +04:00
-- * Debug
2020-05-08 22:14:09 +04:00
, dump
2020-06-04 13:48:04 +04:00
-- * Comments and ranges
, ASTInfo(..)
2020-05-08 22:14:09 +04:00
) where
2020-04-30 14:39:51 +04:00
2020-06-09 15:56:11 +04:00
import Control.Monad.Writer hiding (Product)
import Control.Monad.State
import Control.Monad.Catch
import qualified Control.Monad.Reader as MTL
2020-04-30 14:39:51 +04:00
2020-06-04 17:40:38 +04:00
import Data.Functor ((<&>))
import Data.Foldable
import Data.IORef
2020-06-04 17:16:04 +04:00
import Data.Text (Text, unpack)
import qualified Data.Text as Text
2020-04-30 14:39:51 +04:00
import System.FilePath
2020-04-30 14:39:51 +04:00
import ParseTree
import Range
import Pretty
import Comment
2020-06-01 18:17:33 +04:00
import Error
2020-06-09 15:56:11 +04:00
import Product
2020-04-30 14:39:51 +04:00
2020-04-30 17:46:39 +04:00
import Debug.Trace
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Parser of tree-sitter-made tree.
2020-05-08 22:07:53 +04:00
--
-- TODO: separate state. Polysemy?
--
type Parser =
WriterT [Error ASTInfo]
(StateT (Product PList)
IO)
type PList = [ParseForest, [Text], FilePath]
-- | Auto-accumulated information to be put into AST being build.
type ASTInfo = Product [Range, [Text]]
runParser
:: Parser a
-> FilePath
-> IO (a, [Error ASTInfo])
runParser parser fin = do
pforest <- toParseTree fin
let dir = takeDirectory fin
runWriterT parser `evalStateT` Cons pforest (Cons [] (Cons dir Nil))
restart :: Parser a -> FilePath -> Parser a
restart p fin = do
dir <- get' @FilePath
(a, errs) <- liftIO do runParser p (dir </> fin)
tell errs
return a
get' :: forall x. Contains x PList => Parser x
get' = gets getElem
gets' :: forall x a. Contains x PList => (x -> a) -> Parser a
gets' f = gets (f . getElem)
put' :: forall x. Contains x PList => x -> Parser ()
put' x = modify $ modElem $ const x
mod' :: forall x. Contains x PList => (x -> x) -> Parser ()
mod' = modify . modElem
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Generate error originating at current location.
makeError :: Text -> Parser (Error ASTInfo)
2020-04-30 21:06:01 +04:00
makeError msg = do
rng <- getInfo
2020-04-30 21:06:01 +04:00
makeError' msg rng
2020-05-08 00:52:33 +04:00
-- | Generate error originating at given location.
makeError' :: Text -> info -> Parser (Error info)
makeError' msg i = do
src <- gets' pfGrove <&> \case
2020-05-08 20:38:41 +04:00
[] -> ""
(,) _ ParseTree { ptSource } : _ -> ptSource
2020-04-30 21:06:01 +04:00
return Expected
{ eMsg = msg
, eWhole = src
, eInfo = i
2020-04-30 21:06:01 +04:00
}
2020-05-08 00:52:33 +04:00
-- | Pick next tree in a forest or die with msg.
2020-04-30 14:39:51 +04:00
takeNext :: Text -> Parser ParseTree
takeNext msg = do
gets' pfGrove >>= \case
2020-04-30 14:39:51 +04:00
[] -> die msg
(_, t) : f -> do
2020-05-08 21:30:19 +04:00
if "comment" `Text.isSuffixOf` ptName t
then do
mod' (ptSource t :)
2020-05-08 21:30:19 +04:00
takeNext msg
else do
mod' \st -> st
{ pfRange = diffRange (pfRange st) (ptRange t)
, pfGrove = f
}
2020-05-08 21:30:19 +04:00
return t
2020-04-30 14:39:51 +04:00
2020-05-19 21:26:57 +04:00
--fields :: Text -> Parser a -> Parser [a]
--fields name parser = do
-- (fs, rest) <- gets $ splitForest name . fst
-- res <- for fs \f -> do
-- put f
-- parser
--
-- put rest
-- return res
--
--splitForest :: Text -> ParseForest -> [ParseForest]
--splitForest name = go . pfGrove
-- where
-- go [] acc fs = (fs, acc)
-- go ((tName, tree) : other) acc fs =
-- if tName == name
-- then go other [] (reverse (tree : acc) : fs)
-- else go other (tree : acc) fs
2020-05-08 00:52:33 +04:00
-- | Pick a tree with that /field name/ or die with name as msg.
--
-- Will erase all subtrees with different names on the path!
--
2020-04-30 17:46:39 +04:00
field :: Text -> Parser a -> Parser a
2020-04-30 14:39:51 +04:00
field name parser = do
gets' pfGrove >>= \case
2020-04-30 14:39:51 +04:00
(name', t) : _
| name == name' -> do
sandbox True t
grove -> do
2020-04-30 14:39:51 +04:00
case lookup name grove of
Just tree -> sandbox False tree
Nothing -> die name
where
sandbox firstOne tree@ParseTree {ptID, ptRange} = do
st@Forest {pfGrove = grove, pfRange = rng} <- get'
2020-05-08 21:30:19 +04:00
let (errs, new_comments, grove') = delete name grove
mod' (++ new_comments)
put' Forest
{ pfID = ptID
, pfGrove = [(name, tree)]
, pfRange = ptRange
}
2020-04-30 14:39:51 +04:00
2020-05-08 20:38:41 +04:00
res <- parser
put' st
{ pfGrove = grove'
, pfRange = if firstOne then diffRange rng ptRange else rng
}
put' @[Text] []
2020-04-30 14:39:51 +04:00
for_ errs (tell . pure . unexpected)
2020-05-08 20:38:41 +04:00
return res
2020-06-04 17:40:38 +04:00
fallback :: Stubbed a ASTInfo => Text -> Parser a
fallback msg = pure . stub =<< makeError msg
-- | Produce "expected ${X}" error at this point.
die :: Text -> Parser a
die msg = throwM =<< makeError msg
2020-06-04 17:40:38 +04:00
die' ::Text -> ASTInfo -> Parser a
die' msg rng = throwM =<< makeError' msg rng
2020-04-30 14:39:51 +04:00
2020-06-01 22:02:16 +04:00
-- | When tree-sitter found something it was unable to process.
unexpected :: ParseTree -> Error ASTInfo
unexpected ParseTree { ptSource, ptRange } =
2020-06-09 15:56:11 +04:00
Expected "not that" ptSource (Cons ptRange $ Cons [] Nil)
2020-05-08 00:52:33 +04:00
-- | If a parser fails, return stub with error originating here.
stubbed :: Stubbed a ASTInfo => Text -> Parser a -> Parser a
2020-04-30 14:39:51 +04:00
stubbed msg parser = do
parser <|> fallback msg
2020-05-08 00:52:33 +04:00
-- | The forest must start with tree of that name. Its subtrees become new
-- forest. Otherwise, it dies with name as msg.
2020-04-30 14:39:51 +04:00
subtree :: Text -> Parser a -> Parser a
subtree msg parser = do
ParseTree {ptChildren, ptName} <- takeNext msg
if ptName == msg
then do
save <- get' @ParseForest
put' ptChildren
rest <- gets' pfGrove
collectErrors rest
parser <* put' save
2020-04-30 14:39:51 +04:00
else do
die msg
2020-05-08 00:52:33 +04:00
-- | Because `ExceptT` requires error to be `Monoid` for `Alternative`.
2020-04-30 14:39:51 +04:00
(<|>) :: Parser a -> Parser a -> Parser a
l <|> r = do
s <- get' @ParseForest
c <- get' @[Text]
l `catch` \(e :: Error ASTInfo) -> do
put' s
put' c
r
2020-04-30 14:39:51 +04:00
2020-06-01 22:02:16 +04:00
-- | Custom @foldl1 (<|>)@.
2020-04-30 14:39:51 +04:00
select :: [Parser a] -> Parser a
select = foldl1 (<|>)
2020-06-01 22:02:16 +04:00
-- | Custom @optionMaybe@.
2020-04-30 14:39:51 +04:00
optional :: Parser a -> Parser (Maybe a)
optional p = fmap Just p <|> return Nothing
2020-05-08 00:52:33 +04:00
-- | Custom `Alternative.many`.
--
2020-05-09 12:17:37 +04:00
-- TODO: remove, replace with `fields` combinator.
--
many :: Parser a -> Parser [a]
many p = many'
2020-04-30 14:39:51 +04:00
where
many' = some' <|> pure []
2020-04-30 17:46:39 +04:00
some' = do
x <- p
xs <- many'
return (x : xs)
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Custom `Alternative.some`.
--
some :: Parser a -> Parser [a]
some p = some'
2020-04-30 14:39:51 +04:00
where
many' = some' <|> pure []
2020-04-30 17:46:39 +04:00
some' = do
x <- p
xs <- many'
return (x : xs)
2020-05-08 00:52:33 +04:00
-- | Run parser on given file and pretty-print stuff.
2020-05-08 21:30:19 +04:00
--
debugParser :: Show a => Parser a -> FilePath -> IO ()
debugParser parser fin = do
(res, errs) <- runParser parser fin
putStrLn "Result:"
print res
MTL.unless (null errs) do
putStrLn ""
putStrLn "Errors:"
for_ errs (print . nest 2 . pp)
2020-06-01 22:02:16 +04:00
-- | Consume next tree if it has the given name. Or die.
2020-04-30 14:39:51 +04:00
token :: Text -> Parser Text
token node = do
i <- getInfo
2020-06-04 17:16:04 +04:00
ParseTree {ptName, ptSource} <- takeNext node
2020-04-30 14:39:51 +04:00
if ptName == node
2020-05-08 20:38:41 +04:00
then return ptSource
else die' node i
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Consume next tree, return its textual representation.
2020-04-30 14:39:51 +04:00
anything :: Parser Text
anything = do
tree <- takeNext "anything"
2020-05-08 20:38:41 +04:00
return $ ptSource tree
2020-04-30 14:39:51 +04:00
2020-06-01 22:02:16 +04:00
-- | Get range of the current tree (or forest) before the parser was run.
2020-04-30 14:39:51 +04:00
range :: Parser a -> Parser (a, Range)
range parser =
get' >>= \case
Forest {pfGrove = [(,) _ ParseTree {ptRange}]} -> do
2020-04-30 14:39:51 +04:00
a <- parser
return (a, ptRange)
Forest {pfRange} -> do
2020-04-30 14:39:51 +04:00
a <- parser
return (a, pfRange)
2020-05-08 00:52:33 +04:00
-- | Get current range.
2020-06-01 18:17:33 +04:00
currentRange :: Parser Range
currentRange = snd <$> range (return ())
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Remove all keys until given key is found; remove the latter as well.
--
2020-05-08 21:30:19 +04:00
-- Also returns all ERROR-nodes.
--
-- TODO: rename.
--
2020-05-08 00:52:33 +04:00
-- Notice: this works differently from `Prelude.remove`!
--
2020-05-08 21:30:19 +04:00
delete :: Text -> [(Text, ParseTree)] -> ([ParseTree], [Text], [(Text, ParseTree)])
delete _ [] = ([], [], [])
2020-04-30 14:39:51 +04:00
delete k ((k', v) : rest) =
if k == k'
2020-05-08 21:30:19 +04:00
then (addIfError v [], addIfComment v [], rest)
else (addIfError v vs, addIfComment v cs, remains)
where
2020-05-08 21:30:19 +04:00
(vs, cs, remains) = delete k rest
2020-06-04 17:16:04 +04:00
addIfError v' =
if ptName v' == "ERROR"
then (:) v'
2020-05-08 21:30:19 +04:00
else id
2020-06-04 17:16:04 +04:00
addIfComment v' =
if "comment" `Text.isSuffixOf` ptName v'
then (ptSource v' :)
2020-05-08 21:30:19 +04:00
else id
-- | Report all ERRORs from the list.
collectErrors :: [(Text, ParseTree)] -> Parser ()
collectErrors vs =
for_ vs \(_, v) -> do
MTL.when (ptName v == "ERROR") do
tell [unexpected v]
2020-04-30 14:39:51 +04:00
2020-05-08 00:52:33 +04:00
-- | Universal accessor.
--
-- Usage:
--
-- > inside "$field:$treename"
-- > inside "$field"
-- > inside ":$treename" -- don't, use "subtree"
--
inside :: Stubbed a ASTInfo => Text -> Parser a -> Parser a
inside sig parser = do
let (f, st') = Text.breakOn ":" sig
let st = Text.drop 1 st'
if Text.null f
then do
2020-05-08 00:52:33 +04:00
-- The order is important.
subtree st do
stubbed f do
parser
else do
field f do
stubbed f do
if Text.null st
then do
parser
else do
subtree st do
parser
2020-05-08 00:52:33 +04:00
-- | Equip given constructor with info.
2020-06-01 18:17:33 +04:00
getInfo :: Parser ASTInfo
2020-06-09 15:56:11 +04:00
getInfo = Cons <$> currentRange <*> do Cons <$> grabComments <*> pure Nil
2020-05-08 21:30:19 +04:00
2020-06-01 22:02:16 +04:00
-- | Take the accumulated comments, clean the accumulator.
2020-05-08 21:30:19 +04:00
grabComments :: Parser [Text]
grabComments = do
comms <- get'
mod' @[Text] $ const []
2020-05-08 21:30:19 +04:00
return comms
2020-05-08 00:52:33 +04:00
-- | /Actual/ debug pring.
dump :: Parser ()
dump = gets' pfGrove >>= traceShowM