]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read.hs
Add --output option.
[doclang.git] / Language / TCT / Read.hs
1 module Language.TCT.Read
2 ( module Language.TCT.Read.Cell
3 , module Language.TCT.Read.Elem
4 , module Language.TCT.Read.Token
5 , module Language.TCT.Read.Tree
6 , module Language.TCT.Read
7 ) where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
12 import Data.Bool
13 import Data.Either (Either(..))
14 import Data.Eq (Eq(..))
15 import Data.Foldable (Foldable(..), any)
16 import Data.Function (($), (.))
17 import Data.Functor ((<$>))
18 import Data.List.NonEmpty (NonEmpty(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Traversable (Traversable(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Data.Void (Void)
23 import System.FilePath ((</>))
24 import System.IO (FilePath, IO)
25 import Text.Show (Show(..), showString)
26 import qualified Control.Monad.Trans.Reader as R
27 import qualified Data.ByteString.Lazy as BSL
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Encoding as TL
30 import qualified System.Directory as Directory
31 import qualified System.FilePath as FilePath
32 import qualified System.IO.Error as IO
33 import qualified Text.Megaparsec as P
34
35 import Language.TCT.Debug
36 import Language.TCT.Tree
37 import Language.TCT.Cell
38 import Language.TCT.Read.Cell
39 import Language.TCT.Read.Elem
40 import Language.TCT.Read.Tree
41 import Language.TCT.Read.Token
42
43 -- | Parsing is done in two phases:
44 --
45 -- 1. indentation-sensitive parsing on 'TL.Text'
46 -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
47 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
48 readTCT inp = do
49 inpFileCanon <-
50 Directory.makeRelativeToCurrentDirectory =<<
51 Directory.canonicalizePath inp
52 goFile (NodeHeader HeaderDash) $
53 Span inpFileCanon pos1 pos1:|[]
54 where
55 goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
56 goFile parentNode spans@(Span{span_file=inpFile}:|inpPath)
57 | any (\Span{span_file} -> span_file == inpFile) inpPath =
58 return $ Left $ ErrorReadIncludeLoop spans
59 | otherwise = do
60 readFile inpFile >>= \case
61 Left err -> return $ Left $ ErrorReadIO spans err
62 Right inpText ->
63 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
64 Left err -> return $ Left $ ErrorReadParser err
65 Right trees ->
66 (join <$>) . sequence <$>
67 traverse
68 (goTree parentNode)
69 (debug0 "readTCTWithIncludes" trees)
70 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
71 goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
72 case nod of
73 NodeLower{} -> return $ Right $ pure t
74 -- NOTE: preserve NodeText ""
75 NodeText n | TL.null n -> return $ Right $ pure t
76 NodeText n ->
77 case parNode of
78 NodeHeader HeaderBar{} -> return $ Right $ pure t
79 NodeHeader HeaderEqual{} -> return $ Right $ pure t
80 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
81 _ ->
82 return $ left ErrorReadParser $
83 parseTokens <$> parseLexemes (Cell ss n)
84 NodeHeader (HeaderDotSlash incFile) -> do
85 incFileCanon <-
86 Directory.makeRelativeToCurrentDirectory =<<
87 Directory.canonicalizePath
88 (FilePath.takeDirectory span_file </> incFile)
89 ((pure . Tree c <$>) <$>) $
90 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
91 -- to merge nodes accross files, when writing XML
92 goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
93 _ ->
94 (pure . Tree c . join <$>) .
95 sequence <$> traverse (goTree nod') ts
96 where
97 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
98 nod' = case nod of
99 NodePara -> parNode
100 _ -> nod
101
102 readFile :: FilePath -> IO (Either IO.IOError TL.Text)
103 readFile fp =
104 (Right . TL.decodeUtf8 <$> BSL.readFile fp)
105 `IO.catchIOError` \e ->
106 if IO.isAlreadyInUseError e
107 || IO.isDoesNotExistError e
108 || IO.isPermissionError e
109 then return $ Left e
110 else IO.ioError e
111
112 -- | Useful when producing only an exact source file rendition.
113 readTCTWithoutIncludes ::
114 FilePath -> TL.Text ->
115 Either ErrorRead (Trees (Cell Node))
116 readTCTWithoutIncludes inp txt = do
117 trs <-
118 left ErrorReadParser $
119 (`R.runReader` []) $
120 P.runParserT (p_Trees <* P.eof) inp txt
121 join <$> traverse (go $ NodeHeader HeaderDash)
122 (debug0 "readTCT" trs)
123 where
124 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
125 go parent t@(Tree c@(Cell ssn nod) ts) =
126 case nod of
127 NodeLower{} -> Right $ pure t
128 -- NOTE: preserve NodeText ""
129 NodeText n | TL.null n -> Right $ pure t
130 NodeText n ->
131 case parent of
132 NodeHeader HeaderBar{} -> Right $ pure t
133 NodeHeader HeaderEqual{} -> Right $ pure t
134 NodeHeader HeaderDashDash{} -> Right $ pure t
135 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
136 _ -> pure . Tree c . join <$> traverse (go nod') ts
137 where
138 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
139 nod' = case nod of
140 NodePara -> parent
141 _ -> nod
142
143 -- * Type 'ErrorRead'
144 data ErrorRead
145 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
146 | ErrorReadIncludeLoop Spans
147 | ErrorReadIO Spans IO.IOError
148 deriving (Eq)
149 instance Show ErrorRead where
150 showsPrec _p = \case
151 ErrorReadParser e ->
152 showString (P.parseErrorPretty e)
153 ErrorReadIncludeLoop (Span{..}:|spans) ->
154 showString "ErrorReadIncludeLoop" .
155 showString "\n " . showString span_file .
156 showString (foldMap (\s -> "\n included by "<>show s) spans)
157 ErrorReadIO (_:|spans) err ->
158 showString "ErrorReadIO" .
159 showString "\n " . showsPrec 10 err .
160 showString (foldMap (\s -> "\n in "<>show s) spans)