1 module Language.TCT.Read
2 ( module Language.TCT.Read.Cell
3 , module Language.TCT.Read.Elem
4 , module Language.TCT.Read.Token
5 , module Language.TCT.Read.Tree
6 , module Language.TCT.Read
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
13 import Data.Either (Either(..))
14 import Data.Eq (Eq(..))
15 import Data.Foldable (Foldable(..), any)
16 import Data.Function (($), (.))
17 import Data.Functor ((<$>))
18 import Data.List.NonEmpty (NonEmpty(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Traversable (Traversable(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Data.Void (Void)
23 import System.FilePath ((</>))
24 import System.IO (FilePath, IO)
25 import Text.Show (Show(..), showString)
26 import qualified Control.Monad.Trans.Reader as R
27 import qualified Data.ByteString.Lazy as BSL
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Encoding as TL
30 import qualified System.Directory as Directory
31 import qualified System.FilePath as FilePath
32 import qualified Text.Megaparsec as P
34 import Language.TCT.Debug
35 import Language.TCT.Tree
36 import Language.TCT.Cell
37 import Language.TCT.Read.Cell
38 import Language.TCT.Read.Elem
39 import Language.TCT.Read.Tree
40 import Language.TCT.Read.Token
42 -- | Parsing is done in two phases:
44 -- 1. indentation-sensitive parsing on 'TL.Text'
45 -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
46 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
49 Directory.makeRelativeToCurrentDirectory =<<
50 Directory.canonicalizePath inp
51 goFile (NodeHeader HeaderDash) $
52 Span inpFileCanon pos1 pos1:|[]
54 goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
55 goFile parentNode spans@(Span{span_file=inpFile}:|inpPath)
56 | any (\Span{span_file} -> span_file == inpFile) inpPath =
57 return $ Left $ ErrorReadIncludeLoop spans
59 inpText <- readFile inpFile
60 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
61 Left err -> return $ Left $ ErrorReadParser err
63 (join <$>) . sequence <$>
66 (debug0 "readTCTWithIncludes" trees)
67 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
68 goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
70 NodeLower{} -> return $ Right $ pure t
71 -- NOTE: preserve NodeText ""
72 NodeText n | TL.null n -> return $ Right $ pure t
75 NodeHeader HeaderBar{} -> return $ Right $ pure t
76 NodeHeader HeaderEqual{} -> return $ Right $ pure t
77 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
79 return $ left ErrorReadParser $
80 parseTokens <$> parseLexemes (Cell ss n)
81 NodeHeader (HeaderDotSlash incFile) -> do
83 Directory.makeRelativeToCurrentDirectory =<<
84 Directory.canonicalizePath
85 (FilePath.takeDirectory span_file </> incFile)
86 ((pure . Tree c <$>) <$>) $
87 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
88 -- to merge nodes accross files, when writing XML
89 goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
91 (pure . Tree c . join <$>) .
92 sequence <$> traverse (goTree nod') ts
94 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
99 readFile :: FilePath -> IO TL.Text
100 readFile fp = TL.decodeUtf8 <$> BSL.readFile fp
102 -- | Useful when producing only an exact source file rendition.
103 readTCTWithoutIncludes ::
104 FilePath -> TL.Text ->
105 Either ErrorRead (Trees (Cell Node))
106 readTCTWithoutIncludes inp txt = do
108 left ErrorReadParser $
110 P.runParserT (p_Trees <* P.eof) inp txt
111 join <$> traverse (go $ NodeHeader HeaderDash)
112 (debug0 "readTCT" trs)
114 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
115 go parent t@(Tree c@(Cell ssn nod) ts) =
117 NodeLower{} -> Right $ pure t
118 -- NOTE: preserve NodeText ""
119 NodeText n | TL.null n -> Right $ pure t
122 NodeHeader HeaderBar{} -> Right $ pure t
123 NodeHeader HeaderEqual{} -> Right $ pure t
124 NodeHeader HeaderDashDash{} -> Right $ pure t
125 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
126 _ -> pure . Tree c . join <$> traverse (go nod') ts
128 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
133 -- * Type 'ErrorRead'
135 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
136 | ErrorReadIncludeLoop Spans
138 instance Show ErrorRead where
141 showString (P.parseErrorPretty e)
142 ErrorReadIncludeLoop (Span{..}:|spans) ->
143 showString "ErrorReadIncludeLoop" .
144 showString "\n " . showString span_file .
145 showString (foldMap (\s -> "\n included by "<>show s) spans)