Simplifying parser

This commit is contained in:
Kirill Andreev 2020-05-08 20:38:41 +04:00
parent 7ff8226e1b
commit 916d9897bd
No known key found for this signature in database
GPG Key ID: CF7DA79DE4785A47
3 changed files with 49 additions and 70 deletions

View File

@ -614,13 +614,13 @@ paramDecl :: Parser (VarDecl ASTInfo)
paramDecl = do paramDecl = do
subtree "param_decl" do subtree "param_decl" do
ctor Decl ctor Decl
<*> do inside ":access" do <*> inside "access" do
select ctor access' <*> anything
[ ctor Mutable <* consumeOrDie "var" <*> inside "name" name
, ctor Immutable <* consumeOrDie "const" <*> inside "type" type_
] where
<*> inside "name" name access' r "var" = Mutable r
<*> inside "type" type_ access' r "const" = Immutable r
newtype_ = select newtype_ = select
[ record_type [ record_type
@ -716,8 +716,8 @@ typeTuple = do
-- example = "../../../src/test/contracts/bytes_unpack.ligo" -- example = "../../../src/test/contracts/bytes_unpack.ligo"
-- example = "../../../src/test/contracts/chain_id.ligo" -- example = "../../../src/test/contracts/chain_id.ligo"
-- example = "../../../src/test/contracts/coase.ligo" -- example = "../../../src/test/contracts/coase.ligo"
example = "../../../src/test/contracts/failwith.ligo" -- example = "../../../src/test/contracts/failwith.ligo"
-- example = "../../../src/test/contracts/application.ligo" example = "../../../src/test/contracts/loop.ligo"
-- example = "../../../src/test/contracts/application.ligo" -- example = "../../../src/test/contracts/application.ligo"
-- example = "../../../src/test/contracts/application.ligo" -- example = "../../../src/test/contracts/application.ligo"
-- example = "../../../src/test/contracts/application.ligo" -- example = "../../../src/test/contracts/application.ligo"

View File

@ -1,13 +1,16 @@
{-# language Strict #-} {-# language StrictData #-}
module ParseTree where module ParseTree where
import Data.IORef import Data.IORef
import qualified Data.Text as Text import qualified Data.Text as Text
import Data.Text (Text)
import Data.Traversable (for) import Data.Traversable (for)
import Data.Text.Encoding
import Data.Text.Foreign (withCStringLen) import Data.Text.Foreign (withCStringLen)
import Data.Text.IO as IO import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import TreeSitter.Parser import TreeSitter.Parser
import TreeSitter.Tree import TreeSitter.Tree
@ -42,22 +45,23 @@ getNodeTypesPath = getDataFileName "../pascaligo/src/node-types.json"
-- | The tree tree-sitter produces. -- | The tree tree-sitter produces.
data ParseTree = ParseTree data ParseTree = ParseTree
{ ptID :: Int -- ^ Unique number, for fast comparison. { ptID :: Int -- ^ Unique number, for fast comparison.
, ptName :: Text.Text -- ^ Name of the node. , ptName :: Text -- ^ Name of the node.
, ptRange :: Range -- ^ Range of the node. , ptRange :: Range -- ^ Range of the node.
, ptSource :: ~Text -- ^ Range of the node.
, ptChildren :: ParseForest -- ^ Subtrees. , ptChildren :: ParseForest -- ^ Subtrees.
} }
deriving (Show) via PP ParseTree deriving (Show) via PP ParseTree
-- ^ The forest we work with. -- ^ The forest we work with.
data ParseForest = Forest data ParseForest = Forest
{ pfID :: Int -- ^ Unique number for comparison. { pfID :: Int -- ^ Unique number for comparison.
, pfGrove :: [(Text.Text, ParseTree)] -- ^ Subtrees. , pfGrove :: [(Text, ParseTree)] -- ^ Subtrees.
, pfRange :: Range -- ^ Full range of the forest. , pfRange :: Range -- ^ Full range of the forest.
} }
deriving (Show) via PP ParseForest deriving (Show) via PP ParseForest
instance Pretty ParseTree where instance Pretty ParseTree where
pp (ParseTree _ n r forest) = pp (ParseTree _ n r forest _) =
parens parens
( hang ( hang
(quotes (text (Text.unpack n)) <+> pp r) (quotes (text (Text.unpack n)) <+> pp r)
@ -73,19 +77,27 @@ instance Pretty ParseForest where
then nest 2 $ pp tree then nest 2 $ pp tree
else hang (text (Text.unpack field) <> ": ") 2 (pp tree) else hang (text (Text.unpack field) <> ": ") 2 (pp tree)
-- | Extract textual representation of given range.
cutOut :: Range -> ByteString -> Text
cutOut (Range (_, _, s) (_, _, f)) bs =
decodeUtf8
$ BS.take (f - s)
$ BS.drop s
bs
-- | Feed file contents into PascaLIGO grammar recogniser. -- | Feed file contents into PascaLIGO grammar recogniser.
toParseTree :: FilePath -> IO ParseForest toParseTree :: FilePath -> IO ParseForest
toParseTree fin = do toParseTree fin = do
parser <- ts_parser_new parser <- ts_parser_new
True <- ts_parser_set_language parser tree_sitter_PascaLigo True <- ts_parser_set_language parser tree_sitter_PascaLigo
src <- IO.readFile fin src <- BS.readFile fin
idCounter <- newIORef 0 idCounter <- newIORef 0
withCStringLen src \(str, len) -> do BS.useAsCStringLen src \(str, len) -> do
tree <- ts_parser_parse_string parser nullPtr str len tree <- ts_parser_parse_string parser nullPtr str len
finalTree <- withRootNode tree (peek >=> go idCounter) finalTree <- withRootNode tree (peek >=> go src idCounter)
return $ Forest 0 [("", finalTree)] (ptRange finalTree) return $ Forest 0 [("", finalTree)] (ptRange finalTree)
where where
@ -94,8 +106,8 @@ toParseTree fin = do
modifyIORef' ref (+ 1) modifyIORef' ref (+ 1)
readIORef ref readIORef ref
go :: IORef Int -> Node -> IO ParseTree go :: ByteString -> IORef Int -> Node -> IO ParseTree
go idCounter node = do go src idCounter node = do
let count = fromIntegral $ nodeChildCount node let count = fromIntegral $ nodeChildCount node
allocaArray count \children -> do allocaArray count \children -> do
alloca \tsNodePtr -> do alloca \tsNodePtr -> do
@ -105,7 +117,7 @@ toParseTree fin = do
peekElemOff children i peekElemOff children i
trees <- for nodes \node' -> do trees <- for nodes \node' -> do
tree <- go idCounter node' tree <- go src idCounter node'
field <- field <-
if nodeFieldName node' == nullPtr if nodeFieldName node' == nullPtr
then return "" then return ""
@ -142,4 +154,5 @@ toParseTree fin = do
, ptName = Text.pack ty , ptName = Text.pack ty
, ptRange = range , ptRange = range
, ptChildren = Forest fID trees range , ptChildren = Forest fID trees range
, ptSource = cutOut range src
} }

View File

@ -46,7 +46,7 @@ import Control.Monad.Except
import Control.Monad.Identity import Control.Monad.Identity
import Data.Foldable import Data.Foldable
import Data.Text.Encoding import Data.Functor
import Data.Text (Text, pack, unpack) import Data.Text (Text, pack, unpack)
import qualified Data.Text as Text import qualified Data.Text as Text
@ -75,10 +75,9 @@ instance Pretty Error where
newtype Parser a = Parser newtype Parser a = Parser
{ unParser { unParser
:: WriterT [Error] -- Early I though to report errors that way. :: WriterT [Error] -- Early I though to report errors that way.
( ReaderT ParserEnv -- Range/Something.
( StateT ParseForest -- Current forest to recognise. ( StateT ParseForest -- Current forest to recognise.
( ExceptT Error -- Backtracking. Change `Error` to `()`? ( ExceptT Error -- Backtracking. Change `Error` to `()`?
( Identity )))) -- I forgot why. `#include`? Debug via `print`? ( Identity ))) -- I forgot why. `#include`? Debug via `print`?
a a
} }
deriving newtype deriving newtype
@ -87,7 +86,6 @@ newtype Parser a = Parser
, Monad , Monad
, MonadState ParseForest , MonadState ParseForest
, MonadWriter [Error] , MonadWriter [Error]
, MonadReader ParserEnv
, MonadError Error , MonadError Error
) )
@ -101,7 +99,9 @@ makeError msg = do
makeError' :: Text -> Range -> Parser Error makeError' :: Text -> Range -> Parser Error
makeError' msg rng = do makeError' msg rng = do
rng <- getRange rng <- getRange
src <- cutOut rng src <- gets pfGrove <&> \case
[] -> ""
(,) _ ParseTree { ptSource } : _ -> ptSource
return Expected return Expected
{ eMsg = msg { eMsg = msg
, eWhole = src , eWhole = src
@ -148,11 +148,15 @@ field name parser = do
, pfRange = ptRange , pfRange = ptRange
} }
parser <* put st res <- parser
put st
{ pfGrove = grove' { pfGrove = grove'
, pfRange = if firstOne then diffRange rng ptRange else rng , pfRange = if firstOne then diffRange rng ptRange else rng
} }
return res
-- | Variuos error reports. -- | Variuos error reports.
fallback :: Stubbed a => Text -> Parser a fallback :: Stubbed a => Text -> Parser a
fallback' :: Stubbed a => Text -> Range -> Parser a fallback' :: Stubbed a => Text -> Range -> Parser a
@ -215,19 +219,6 @@ some p = some'
xs <- many' xs <- many'
return (x : xs) return (x : xs)
-- | The source of file being parsed. BS, because tree-sitter has offsets
-- in /bytes/.
data ParserEnv = ParserEnv
{ peSource :: ByteString
}
-- | Debug print via IO. Obsolete.
--
-- TODO: remove. Also, remove IO from Parser tf stack.
--
puts :: MonadIO m => Show a => a -> m ()
puts = liftIO . print
-- | Run parser on given file. -- | Run parser on given file.
-- --
-- TODO: invent /proper/ 'ERROR'-node collector. -- TODO: invent /proper/ 'ERROR'-node collector.
@ -235,13 +226,11 @@ puts = liftIO . print
runParser :: Parser a -> FilePath -> IO (a, [Error]) runParser :: Parser a -> FilePath -> IO (a, [Error])
runParser (Parser parser) fin = do runParser (Parser parser) fin = do
pforest <- toParseTree fin pforest <- toParseTree fin
text <- ByteString.readFile fin
let let
res = res =
runIdentity runIdentity
$ runExceptT $ runExceptT
$ flip runStateT pforest $ flip runStateT pforest
$ flip runReaderT (ParserEnv text)
$ runWriterT $ runWriterT
$ parser $ parser
@ -260,39 +249,16 @@ debugParser parser fin = do
-- | Consume next tree if it has give name. Or die. -- | Consume next tree if it has give name. Or die.
token :: Text -> Parser Text token :: Text -> Parser Text
token node = do token node = do
tree@ParseTree {ptName, ptRange} <- takeNext node tree@ParseTree {ptName, ptRange, ptSource} <- takeNext node
if ptName == node if ptName == node
then do then return ptSource
cutOut ptRange else die' node ptRange
else do
die' node ptRange
-- | Consume next tree, return its textual representation. -- | Consume next tree, return its textual representation.
anything :: Parser Text anything :: Parser Text
anything = do anything = do
tree <- takeNext "anything" tree <- takeNext "anything"
cutOut $ ptRange tree return $ ptSource tree
-- | TODO: remove, b/c obsolete.
consume :: Text -> Parser ()
consume node = do
ParseTree {ptName, ptRange} <- takeNext node
when (ptName /= node) do
complain node ptRange
-- | TODO: remove, its literally is `void . token`.
consumeOrDie :: Text -> Parser ()
consumeOrDie node = do
ParseTree {ptName, ptRange} <- takeNext node
when (ptName /= node) do
die' node ptRange
-- | Extract textual representation of given range.
cutOut :: Range -> Parser Text
cutOut (Range (_, _, s) (_, _, f)) = do
bs <- asks peSource
return $ decodeUtf8 $ ByteString.take (f - s) (ByteString.drop s bs)
-- | Get range of current tree or forest before the parser was run. -- | Get range of current tree or forest before the parser was run.
range :: Parser a -> Parser (a, Range) range :: Parser a -> Parser (a, Range)