]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read.hs
Add better support for HeaderDotSlash including.
[doclang.git] / Language / TCT / Read.hs
1 module Language.TCT.Read
2 ( module Language.TCT.Read.Cell
3 , module Language.TCT.Read.Elem
4 , module Language.TCT.Read.Token
5 , module Language.TCT.Read.Tree
6 , module Language.TCT.Read
7 ) where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Arrow (left)
11 import Control.Monad (Monad(..), join, (=<<))
12 import Data.Bool
13 import Data.Either (Either(..))
14 import Data.Eq (Eq(..))
15 import Data.Foldable (Foldable(..), any)
16 import Data.Function (($), (.))
17 import Data.Functor ((<$>))
18 import Data.List.NonEmpty (NonEmpty(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Traversable (Traversable(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Data.Void (Void)
23 import System.FilePath ((</>))
24 import System.IO (FilePath, IO)
25 import Text.Show (Show(..), showString)
26 import qualified Control.Monad.Trans.Reader as R
27 import qualified Data.ByteString.Lazy as BSL
28 import qualified Data.Text.Lazy as TL
29 import qualified Data.Text.Lazy.Encoding as TL
30 import qualified System.Directory as Directory
31 import qualified System.FilePath as FilePath
32 import qualified Text.Megaparsec as P
33
34 import Language.TCT.Debug
35 import Language.TCT.Tree
36 import Language.TCT.Cell
37 import Language.TCT.Read.Cell
38 import Language.TCT.Read.Elem
39 import Language.TCT.Read.Tree
40 import Language.TCT.Read.Token
41
42 -- | Parsing is done in two phases:
43 --
44 -- 1. indentation-sensitive parsing on 'TL.Text'
45 -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
46 readTCT :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
47 readTCT inp = do
48 inpFileCanon <-
49 Directory.makeRelativeToCurrentDirectory =<<
50 Directory.canonicalizePath inp
51 goFile (NodeHeader HeaderDash) $
52 Span inpFileCanon pos1 pos1:|[]
53 where
54 goFile :: Node -> NonEmpty Span -> IO (Either ErrorRead (Trees (Cell Node)))
55 goFile parentNode spans@(Span{span_file=inpFile}:|inpPath)
56 | any (\Span{span_file} -> span_file == inpFile) inpPath =
57 return $ Left $ ErrorReadIncludeLoop spans
58 | otherwise = do
59 inpText <- readFile inpFile
60 case (`R.runReader` inpPath) $ P.runParserT (p_Trees <* P.eof) inpFile inpText of
61 Left err -> return $ Left $ ErrorReadParser err
62 Right trees ->
63 (join <$>) . sequence <$>
64 traverse
65 (goTree parentNode)
66 (debug0 "readTCTWithIncludes" trees)
67 goTree :: Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
68 goTree parNode t@(Tree c@(Cell ss@(Span{span_file}:|_sn) nod) ts) =
69 case nod of
70 NodeLower{} -> return $ Right $ pure t
71 -- NOTE: preserve NodeText ""
72 NodeText n | TL.null n -> return $ Right $ pure t
73 NodeText n ->
74 case parNode of
75 NodeHeader HeaderBar{} -> return $ Right $ pure t
76 NodeHeader HeaderEqual{} -> return $ Right $ pure t
77 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
78 _ ->
79 return $ left ErrorReadParser $
80 parseTokens <$> parseLexemes (Cell ss n)
81 NodeHeader (HeaderDotSlash incFile) -> do
82 incFileCanon <-
83 Directory.makeRelativeToCurrentDirectory =<<
84 Directory.canonicalizePath
85 (FilePath.takeDirectory span_file </> incFile)
86 ((pure . Tree c <$>) <$>) $
87 -- NOTE: preserve HeaderDotSlash to avoid Seq.spanl
88 -- to merge nodes accross files, when writing XML
89 goFile parNode $ Span incFileCanon pos1 pos1 :| toList ss
90 _ ->
91 (pure . Tree c . join <$>) .
92 sequence <$> traverse (goTree nod') ts
93 where
94 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
95 nod' = case nod of
96 NodePara -> parNode
97 _ -> nod
98
99 readFile :: FilePath -> IO TL.Text
100 readFile fp = TL.decodeUtf8 <$> BSL.readFile fp
101
102 -- | Useful when producing only an exact source file rendition.
103 readTCTWithoutIncludes ::
104 FilePath -> TL.Text ->
105 Either ErrorRead (Trees (Cell Node))
106 readTCTWithoutIncludes inp txt = do
107 trs <-
108 left ErrorReadParser $
109 (`R.runReader` []) $
110 P.runParserT (p_Trees <* P.eof) inp txt
111 join <$> traverse (go $ NodeHeader HeaderDash)
112 (debug0 "readTCT" trs)
113 where
114 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
115 go parent t@(Tree c@(Cell ssn nod) ts) =
116 case nod of
117 NodeLower{} -> Right $ pure t
118 -- NOTE: preserve NodeText ""
119 NodeText n | TL.null n -> Right $ pure t
120 NodeText n ->
121 case parent of
122 NodeHeader HeaderBar{} -> Right $ pure t
123 NodeHeader HeaderEqual{} -> Right $ pure t
124 NodeHeader HeaderDashDash{} -> Right $ pure t
125 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes (Cell ssn n)
126 _ -> pure . Tree c . join <$> traverse (go nod') ts
127 where
128 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
129 nod' = case nod of
130 NodePara -> parent
131 _ -> nod
132
133 -- * Type 'ErrorRead'
134 data ErrorRead
135 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
136 | ErrorReadIncludeLoop Spans
137 deriving (Eq)
138 instance Show ErrorRead where
139 showsPrec _p = \case
140 ErrorReadParser e ->
141 showString (P.parseErrorPretty e)
142 ErrorReadIncludeLoop (Span{..}:|spans) ->
143 showString "ErrorReadIncludeLoop" .
144 showString "\n " . showString span_file .
145 showString (foldMap (\s -> "\n included by "<>show s) spans)