]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read.hs
Add basic support for HeaderDotSlash including.
[doclang.git] / Language / TCT / Read.hs
1 module Language.TCT.Read
2 ( module Language.TCT.Read.Tree
3 , module Language.TCT.Read.Token
4 , module Language.TCT.Read.Cell
5 , module Language.TCT.Read
6 ) where
7
8 import Control.Applicative (Applicative(..))
9 import Control.Arrow (left)
10 import Control.Monad (Monad(..), join)
11 import Data.Bool
12 import Data.Either (Either(..))
13 import Data.Eq (Eq)
14 import Data.Function (($), (.))
15 import Data.Functor ((<$>))
16 import Data.Ord (Ord(..))
17 import Data.Set (Set)
18 import Data.Traversable (Traversable(..))
19 import Data.TreeSeq.Strict (Tree(..), Trees)
20 import Data.Void (Void)
21 import System.FilePath ((</>))
22 import System.IO (FilePath, IO)
23 import Text.Show (Show(..), showParen, showString, showChar)
24 import qualified Data.ByteString.Lazy as BSL
25 import qualified Data.Set as Set
26 import qualified Data.Text.Lazy as TL
27 import qualified Data.Text.Lazy.Encoding as TL
28 import qualified System.FilePath as FilePath
29 import qualified Text.Megaparsec as P
30
31 import Language.TCT.Debug
32 import Language.TCT.Tree
33 import Language.TCT.Cell
34 import Language.TCT.Read.Cell
35 import Language.TCT.Read.Tree
36 import Language.TCT.Read.Token
37
38 -- | Parsing is done in two phases:
39 --
40 -- 1. indentation-sensitive parsing on 'TL.Text'
41 -- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
42 readTCT ::
43 FilePath -> TL.Text ->
44 Either ErrorRead (Trees (Cell Node))
45 readTCT inp txt = do
46 trs <- left ErrorReadParser $ P.runParser (p_Trees <* P.eof) inp txt
47 join <$> traverse (go $ NodeHeader HeaderDash)
48 (debug0 "readTCT" trs)
49 where
50 go :: Node -> Tree (Cell Node) -> Either ErrorRead (Trees (Cell Node))
51 go parent t@(Tree c@(Cell bn en nod) ts) =
52 case nod of
53 NodeLower{} -> Right $ pure t
54 -- NOTE: preserve NodeText ""
55 NodeText n | TL.null n -> Right $ pure t
56 NodeText n ->
57 case parent of
58 NodeHeader HeaderBar{} -> Right $ pure t
59 NodeHeader HeaderEqual{} -> Right $ pure t
60 NodeHeader HeaderDashDash{} -> Right $ pure t
61 _ -> left ErrorReadParser $ parseTokens <$> parseLexemes inp (Cell bn en n)
62 _ -> pure . Tree c . join <$> traverse (go nod') ts
63 where
64 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
65 nod' = case nod of
66 NodePara -> parent
67 _ -> nod
68
69 readFile :: FilePath -> IO TL.Text
70 readFile fp = TL.decodeUtf8 <$> BSL.readFile fp
71
72 readTCTrec :: FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
73 readTCTrec = goFile Set.empty (NodeHeader HeaderDash)
74 where
75 goFile :: Set FilePath -> Node -> FilePath -> IO (Either ErrorRead (Trees (Cell Node)))
76 goFile inputFiles parentNode inpFile
77 | Set.member inpFileNorm inputFiles =
78 return $ Left $ ErrorReadIncludeLoop inpFileNorm
79 | otherwise = do
80 inpText <- readFile inpFileNorm
81 case P.runParser (p_Trees <* P.eof) inpFileNorm inpText of
82 Left err -> return $ Left $ ErrorReadParser err
83 Right trees ->
84 (join <$>) . sequence <$>
85 traverse
86 (goTree (Set.insert inpFileNorm inputFiles) parentNode)
87 (debug0 "readTCTrec" trees)
88 where
89 inpFileNorm = FilePath.normalise inpFile
90 goTree :: Set FilePath -> Node -> Tree (Cell Node) -> IO (Either ErrorRead (Trees (Cell Node)))
91 goTree inpFiles parNode t@(Tree c@(Cell bn en nod) ts) =
92 case nod of
93 NodeLower{} -> return $ Right $ pure t
94 -- NOTE: preserve NodeText ""
95 NodeText n | TL.null n -> return $ Right $ pure t
96 NodeText n ->
97 case parNode of
98 NodeHeader HeaderBar{} -> return $ Right $ pure t
99 NodeHeader HeaderEqual{} -> return $ Right $ pure t
100 NodeHeader HeaderDashDash{} -> return $ Right $ pure t
101 _ ->
102 return $ left ErrorReadParser $
103 parseTokens <$> parseLexemes inpFileNorm (Cell bn en n)
104 NodeHeader (HeaderDotSlash incFile) ->
105 goFile inpFiles parNode $
106 FilePath.takeDirectory inpFileNorm </> incFile
107 _ ->
108 (pure . Tree c . join <$>) .
109 sequence <$> traverse (goTree inpFiles nod') ts
110 where
111 -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
112 nod' = case nod of
113 NodePara -> parNode
114 _ -> nod
115
116 -- * Type 'ErrorRead'
117 data ErrorRead
118 = ErrorReadParser (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
119 | ErrorReadIncludeLoop FilePath
120 deriving (Eq)
121 instance Show ErrorRead where
122 showsPrec p = \case
123 ErrorReadParser e ->
124 showParen (p >= 10) $
125 showString (P.parseErrorPretty e)
126 ErrorReadIncludeLoop file ->
127 showString "ErrorReadIncludeLoop" .
128 showChar ' ' . showsPrec 10 file