2 ( module Hdoc.TCT.Read.Cell
3 , module Hdoc.TCT.Read.Elem
4 , module Hdoc.TCT.Read.Token
5 , module Hdoc.TCT.Read.Tree
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
13 import Data.Either (Either(..))
14 import Data.Eq (Eq(..))
15 import Data.Foldable (Foldable(..), any)
16 import Data.Function (($), (.))
17 import Data.Functor ((<$>))
18 import Data.Functor.Compose (Compose(..))
19 import Data.List.NonEmpty (NonEmpty(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Traversable (Traversable(..))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Data.Void (Void)
24 import System.FilePath ((</>), takeDirectory)
25 import System.IO (FilePath, IO)
26 import Text.Show (Show(..), showString)
27 import qualified Control.Monad.Trans.Reader as R
28 import qualified Data.ByteString.Lazy as BSL
29 import qualified Data.Text.Lazy as TL
30 import qualified Data.Text.Lazy.Encoding as TL
31 import qualified System.Directory as Directory
32 import qualified System.FilePath as FilePath
33 import qualified System.IO.Error as IO
34 import qualified Text.Megaparsec as P
39 import Hdoc.TCT.Read.Cell
40 import Hdoc.TCT.Read.Elem
41 import Hdoc.TCT.Read.Tree
42 import Hdoc.TCT.Read.Token
44 -- | Parsing is done in two phases:
46 -- 1. Indentation-sensitive parsing on 'TL.Text'.
47 -- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. .
48 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
51 Directory.makeRelativeToCurrentDirectory =<<
52 Directory.canonicalizePath inp
53 goFile (NodeHeader HeaderDash) $
54 Span inpFileCanon pos1 pos1:|[]
56 goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
57 goFile parentNode loc@(Span{span_file=inpFile}:|inpPath)
58 | any (\Span{span_file} -> span_file == inpFile) inpPath =
59 return $ Left $ ErrorReadIncludeLoop loc
61 readFile inpFile >>= \case
62 Left err -> return $ Left $ ErrorReadIO loc err
64 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
65 Left err -> return $ Left $ ErrorReadParser err
67 (join <$>) . sequence <$>
71 -- DEBUG: (debug0 "readTCT" trees)
72 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
73 goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
75 NodeLower{} -> return $ Right $ pure t
76 -- NOTE: preserve NodeText ""
77 NodeText n | TL.null n -> return $ Right $ pure t
80 NodeHeader HeaderBar{} -> return $ Right $ pure t
81 NodeHeader HeaderEqual{} -> return $ Right $ pure t
82 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
84 return $ left ErrorReadParser $
85 parseTokens <$> parseLexemes (Cell ss n)
86 NodeHeader (HeaderDotSlash incFile) -> do
88 Directory.makeRelativeToCurrentDirectory =<<
89 Directory.canonicalizePath
90 (FilePath.takeDirectory span_file </> incFile)
91 ((pure . Tree c <$>) <$>) $
92 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
93 -- to merge nodes accross files, when writing XML
94 goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
96 (pure . Tree c . join <$>) .
97 sequence <$> traverse (goTree nod') ts
99 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
104 readFile :: FilePath -> IO (Either IO.IOError TL.Text)
106 (Right . TL.decodeUtf8 <$> BSL.readFile fp)
107 `IO.catchIOError` \e ->
108 if IO.isAlreadyInUseError e
109 || IO.isDoesNotExistError e
110 || IO.isPermissionError e
114 -- | Useful when producing only an exact source file rendition.
115 readTCTWithoutIncludes ::
116 FilePath -> TL.Text ->
117 Either ErrorRead (Trees (Cell Node))
118 readTCTWithoutIncludes inp txt = do
120 left ErrorReadParser $
122 P.runParserT (p_Trees <* P.eof) inp txt
123 join <$> traverse (go $ NodeHeader HeaderDash) trees
124 -- (debug0 "readTCTWithoutIncludes" trees)
126 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
127 go parent t@(Tree c@(Cell ssn nod) ts) =
129 NodeLower{} -> Right $ pure t
130 -- NOTE: preserve NodeText ""
131 NodeText n | TL.null n -> Right $ pure t
134 NodeHeader HeaderBar{} -> Right $ pure t
135 NodeHeader HeaderEqual{} -> Right $ pure t
136 NodeHeader HeaderDashDash{} -> Right $ pure t
137 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
138 _ -> pure . Tree c . join <$> traverse (go nod') ts
140 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
145 dependencies :: Trees (Cell Node) -> [FilePath]
146 dependencies = foldr (go "") []
148 go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath]
149 go dir (Tree (Cell _ss n) ts) acc =
151 NodeHeader (HeaderDotSlash file) ->
153 foldr (go (dir</>takeDirectory file)) acc ts
155 foldr (go dir) acc ts
157 -- * Type 'ErrorRead'
159 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
160 | ErrorReadIncludeLoop Location
161 | ErrorReadIO Location IO.IOError
163 instance Show ErrorRead where
166 showString (P.parseErrorPretty e)
167 ErrorReadIncludeLoop (Span{..}:|loc) ->
168 showString "ErrorReadIncludeLoop" .
169 showString "\n " . showString span_file .
170 showString (foldMap (\s -> "\n included by "<>show s) loc)
171 ErrorReadIO (_:|loc) err ->
172 showString "ErrorReadIO" .
173 showString "\n " . showsPrec 10 err .
174 showString (foldMap (\s -> "\n in "<>show s) loc)