]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Read.hs
XML: use symantic-xml
[doclang.git] / Hdoc / TCT / Read.hs
1 module Hdoc.TCT.Read
2 ( module Hdoc.TCT.Read.Cell
3 , module Hdoc.TCT.Read.Elem
4 , module Hdoc.TCT.Read.Token
5 , module Hdoc.TCT.Read.Tree
6 , module Hdoc.TCT.Read
7 ) where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
12 import Data.Bool
13 import Data.Either (Either(..))
14 import Data.Eq (Eq(..))
15 import Data.Foldable (Foldable(..), any)
16 import Data.Function (($), (.))
17 import Data.Functor ((<$>))
18 import Data.List.NonEmpty (NonEmpty(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Traversable (Traversable(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Data.Void (Void)
23 import System.FilePath ((</>), takeDirectory)
24 import System.IO (FilePath, IO)
25 import Text.Show (Show(..), showString)
26 import qualified Control.Monad.Trans.Reader as R
27 import qualified Data.ByteString.Lazy as BSL
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Encoding as TL
30 import qualified System.Directory as Directory
31 import qualified System.FilePath as FilePath
32 import qualified System.IO.Error as IO
33 import qualified Text.Megaparsec as P
34
35 import Hdoc.TCT.Debug
36 import Hdoc.TCT.Tree
37 import Hdoc.TCT.Cell
38 import Hdoc.TCT.Read.Cell
39 import Hdoc.TCT.Read.Elem
40 import Hdoc.TCT.Read.Tree
41 import Hdoc.TCT.Read.Token
42
43 -- | Parsing is done in two phases:
44 --
45 -- 1. Indentation-sensitive parsing on 'TL.Text'.
46 -- 2. 'Pair'-sensitive parsing on some 'NodeText's resulting of 1. .
47 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
48 readTCT inp = do
49 inpFileCanon <-
50 Directory.makeRelativeToCurrentDirectory =<<
51 Directory.canonicalizePath inp
52 goFile (NodeHeader HeaderDash) $
53 FileRange inpFileCanon pos1 pos1:|[]
54 where
55 goFile :: Node -> NonEmpty FileRange -> IO (Either ErrorRead (Trees (Cell Node)))
56 goFile parentNode loc@(FileRange{fileRange_file=inpFile}:|inpPath)
57 | any (\FileRange{fileRange_file} -> fileRange_file == inpFile) inpPath =
58 return $ Left $ ErrorReadIncludeLoop loc
59 | otherwise = do
60 readFile inpFile >>= \case
61 Left err -> return $ Left $ ErrorReadIO loc err
62 Right inpText ->
63 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
64 Left err -> return $ Left $ ErrorReadParser err
65 Right trees ->
66 (join <$>) . sequence <$>
67 traverse
68 (goTree parentNode)
69 trees
70 -- DEBUG: (debug0 "readTCT" trees)
71 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
72 goTree parNode t@(Tree c@(Sourced ss@(FileRange{fileRange_file}:|_sn) nod) ts) =
73 case nod of
74 NodeLower{} -> return $ Right $ pure t
75 -- NOTE: preserve NodeText ""
76 NodeText n | TL.null n -> return $ Right $ pure t
77 NodeText n ->
78 case parNode of
79 NodeHeader HeaderBar{} -> return $ Right $ pure t
80 NodeHeader HeaderEqual{} -> return $ Right $ pure t
81 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
82 _ ->
83 return $ left ErrorReadParser $
84 parseTokens <$> parseLexemes (Sourced ss n)
85 NodeHeader (HeaderDotSlash incFile) -> do
86 incFileCanon <-
87 Directory.makeRelativeToCurrentDirectory =<<
88 Directory.canonicalizePath
89 (FilePath.takeDirectory fileRange_file </> incFile)
90 ((pure . Tree c <$>) <$>) $
91 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
92 -- to merge nodes accross files, when writing XML
93 goFile parNode $ FileRange incFileCanon pos1 pos1 :| toList ss
94 _ ->
95 (pure . Tree c . join <$>) .
96 sequence <$> traverse (goTree nod') ts
97 where
98 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
99 nod' = case nod of
100 NodePara -> parNode
101 _ -> nod
102
103 readFile :: FilePath -> IO (Either IO.IOError TL.Text)
104 readFile fp =
105 (Right . TL.decodeUtf8 <$> BSL.readFile fp)
106 `IO.catchIOError` \e ->
107 if IO.isAlreadyInUseError e
108 || IO.isDoesNotExistError e
109 || IO.isPermissionError e
110 then return $ Left e
111 else IO.ioError e
112
113 -- | Useful when producing only an exact source file rendition.
114 readTCTWithoutIncludes ::
115 FilePath -> TL.Text ->
116 Either ErrorRead (Trees (Cell Node))
117 readTCTWithoutIncludes inp txt = do
118 trees <-
119 left ErrorReadParser $
120 (`R.runReader` []) $
121 P.runParserT (p_Trees <* P.eof) inp txt
122 join <$> traverse (go $ NodeHeader HeaderDash) trees
123 -- (debug0 "readTCTWithoutIncludes" trees)
124 where
125 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
126 go parent t@(Tree c@(Sourced ssn nod) ts) =
127 case nod of
128 NodeLower{} -> Right $ pure t
129 -- NOTE: preserve NodeText ""
130 NodeText n | TL.null n -> Right $ pure t
131 NodeText n ->
132 case parent of
133 NodeHeader HeaderBar{} -> Right $ pure t
134 NodeHeader HeaderEqual{} -> Right $ pure t
135 NodeHeader HeaderDashDash{} -> Right $ pure t
136 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Sourced ssn n)
137 _ -> pure . Tree c . join <$> traverse (go nod') ts
138 where
139 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
140 nod' = case nod of
141 NodePara -> parent
142 _ -> nod
143
144 dependencies :: Trees (Cell Node) -> [FilePath]
145 dependencies = foldr (go "") []
146 where
147 go :: FilePath -> Tree (Cell Node) -> [FilePath] -> [FilePath]
148 go dir (Tree (Sourced _ss n) ts) acc =
149 case n of
150 NodeHeader (HeaderDotSlash file) ->
151 (dir</>file) :
152 foldr (go (dir</>takeDirectory file)) acc ts
153 _ ->
154 foldr (go dir) acc ts
155
156 -- * Type 'ErrorRead'
157 data ErrorRead
158 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
159 | ErrorReadIncludeLoop Location
160 | ErrorReadIO Location IO.IOError
161 deriving (Eq)
162 instance Show ErrorRead where
163 showsPrec _p = \case
164 ErrorReadParser e ->
165 showString (P.parseErrorPretty e)
166 ErrorReadIncludeLoop (FileRange{..}:|loc) ->
167 showString "ErrorReadIncludeLoop" .
168 showString "\n " . showString fileRange_file .
169 showString (foldMap (\s -> "\n included by "<>show s) loc)
170 ErrorReadIO (_:|loc) err ->
171 showString "ErrorReadIO" .
172 showString "\n " . showsPrec 10 err .
173 showString (foldMap (\s -> "\n in "<>show s) loc)