]> Git — Sourcephile - doclang.git/blob - src/Textphile/TCT/Read.hs
Rename {hdoc => textphile}
[doclang.git] / src / Textphile / TCT / Read.hs
1 module Textphile.TCT.Read
2 ( module Textphile.TCT.Read.Cell
3 , module Textphile.TCT.Read.Elem
4 , module Textphile.TCT.Read.Token
5 , module Textphile.TCT.Read.Tree
6 , module Textphile.TCT.Read
7 ) where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
12 import Data.Bool
13 import Data.Default.Class (Default(..))
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Foldable (Foldable(..), any)
17 import Data.Function (($), (.))
18 import Data.Functor ((<$>))
19 import Data.List.NonEmpty (NonEmpty(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Traversable (Traversable(..))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Data.Void (Void)
24 import System.FilePath ((</>), takeDirectory)
25 import System.IO (FilePath, IO)
26 import Text.Show (Show(..), showString)
27 import qualified Control.Monad.Trans.Reader as R
28 import qualified Data.ByteString.Lazy as BSL
29 import qualified Data.Text.Lazy as TL
30 import qualified Data.Text.Lazy.Encoding as TL
31 import qualified System.Directory as Directory
32 import qualified System.FilePath as FilePath
33 import qualified System.IO.Error as IO
34 import qualified Text.Megaparsec as P
35
36 import Textphile.TCT.Debug
37 import Textphile.TCT.Tree
38 import Textphile.TCT.Cell
39 import Textphile.TCT.Read.Cell
40 import Textphile.TCT.Read.Elem
41 import Textphile.TCT.Read.Tree
42 import Textphile.TCT.Read.Token
43
44 -- | Parsing is done in two phases:
45 --
46 -- 1. Indentation-sensitive parsing on 'TL.Text'.
47 -- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. .
48 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
49 readTCT inp = do
50 inpFileCanon <-
51 Directory.makeRelativeToCurrentDirectory =<<
52 Directory.canonicalizePath inp
53 goFile (NodeHeader HeaderDash) $
54 FileRange inpFileCanon def def:|[]
55 where
56 goFile :: Node -> NonEmpty (FileRange LineColumn) -> IO (Either ErrorRead (Trees (Cell Node)))
57 goFile parentNode loc@(FileRange{fileRange_file=inpFile}:|inpPath)
58 | any (\FileRange{fileRange_file} -> fileRange_file == inpFile) inpPath =
59 return $ Left $ ErrorReadIncludeLoop loc
60 | otherwise = do
61 readFile inpFile >>= \case
62 Left err -> return $ Left $ ErrorReadIO loc err
63 Right inpText ->
64 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
65 Left err -> return $ Left $ ErrorReadParser err
66 Right trees ->
67 (join <$>) . sequence <$>
68 traverse
69 (goTree parentNode)
70 trees
71 -- DEBUG: (debug0 "readTCT" trees)
72 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
73 goTree parNode t@(Tree c@(Sourced ss@(FileRange{fileRange_file}:|_sn) nod) ts) =
74 case nod of
75 NodeLower{} -> return $ Right $ pure t
76 -- NOTE: preserve NodeText ""
77 NodeText n | TL.null n -> return $ Right $ pure t
78 NodeText n ->
79 case parNode of
80 NodeHeader HeaderBar{} -> return $ Right $ pure t
81 NodeHeader HeaderEqual{} -> return $ Right $ pure t
82 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
83 _ ->
84 return $ left ErrorReadParserCell $
85 parseTokens <$> parseLexemes (Sourced ss n)
86 NodeHeader (HeaderDotSlash incFile) -> do
87 incFileCanon <-
88 Directory.makeRelativeToCurrentDirectory =<<
89 Directory.canonicalizePath
90 (FilePath.takeDirectory fileRange_file </> incFile)
91 ((pure . Tree c <$>) <$>) $
92 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
93 -- to merge nodes accross files, when writing XML
94 goFile parNode $ FileRange incFileCanon def def :| toList ss
95 _ ->
96 (pure . Tree c . join <$>) .
97 sequence <$> traverse (goTree nod') ts
98 where
99 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
100 nod' = case nod of
101 NodePara -> parNode
102 _ -> nod
103
104 readFile :: FilePath -> IO (Either IO.IOError TL.Text)
105 readFile fp =
106 (Right . TL.decodeUtf8 <$> BSL.readFile fp)
107 `IO.catchIOError` \e ->
108 if IO.isAlreadyInUseError e
109 || IO.isDoesNotExistError e
110 || IO.isPermissionError e
111 then return $ Left e
112 else IO.ioError e
113
114 -- | Useful when producing only an exact source file rendition.
115 readTCTWithoutIncludes ::
116 FilePath -> TL.Text ->
117 Either ErrorRead (Trees (Cell Node))
118 readTCTWithoutIncludes inp txt = do
119 trees <-
120 left ErrorReadParser $
121 (`R.runReader` []) $
122 P.runParserT (p_Trees <* P.eof) inp txt
123 join <$> traverse (go $ NodeHeader HeaderDash) trees
124 -- (debug0 "readTCTWithoutIncludes" trees)
125 where
126 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
127 go parent t@(Tree c@(Sourced ssn nod) ts) =
128 case nod of
129 NodeLower{} -> Right $ pure t
130 -- NOTE: preserve NodeText ""
131 NodeText n | TL.null n -> Right $ pure t
132 NodeText n ->
133 case parent of
134 NodeHeader HeaderBar{} -> Right $ pure t
135 NodeHeader HeaderEqual{} -> Right $ pure t
136 NodeHeader HeaderDashDash{} -> Right $ pure t
137 _ -> left ErrorReadParserCell $ parseTokens <$> parseLexemes (Sourced ssn n)
138 _ -> pure . Tree c . join <$> traverse (go nod') ts
139 where
140 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
141 nod' = case nod of
142 NodePara -> parent
143 _ -> nod
144
145 dependencies :: Trees (Cell Node) -> [FilePath]
146 dependencies = foldr (go "") []
147 where
148 go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath]
149 go dir (Tree (Sourced _ss n) ts) acc =
150 case n of
151 NodeHeader (HeaderDotSlash file) ->
152 (dir</>file) :
153 foldr (go (dir</>takeDirectory file)) acc ts
154 _ ->
155 foldr (go dir) acc ts
156
157 -- * Type 'ErrorRead'
158 data ErrorRead
159 = ErrorReadParser (P.ParseErrorBundle TL.Text Void)
160 | ErrorReadParserCell (P.ParseErrorBundle StreamCell Void)
161 | ErrorReadIncludeLoop Location
162 | ErrorReadIO Location IO.IOError
163 deriving (Eq)
164 instance Show ErrorRead where
165 showsPrec _p = \case
166 ErrorReadParser e ->
167 showString (P.errorBundlePretty e)
168 ErrorReadParserCell e ->
169 showString (P.errorBundlePretty e)
170 ErrorReadIncludeLoop (FileRange{..}:|loc) ->
171 showString "ErrorReadIncludeLoop" .
172 showString "\n " . showString fileRange_file .
173 showString (foldMap (\s -> "\n included by "<>show s) loc)
174 ErrorReadIO (_:|loc) err ->
175 showString "ErrorReadIO" .
176 showString "\n " . showsPrec 10 err .
177 showString (foldMap (\s -> "\n in "<>show s) loc)