]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read.hs
WIP add paragraph recognition, enabling footnote with note: instead of only <note>.
[doclang.git] / Language / TCT / Read.hs
1 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
2 {-# LANGUAGE TypeFamilies #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Read
5 ( module Language.TCT.Read.Tree
6 , module Language.TCT.Read.Token
7 , module Language.TCT.Read.Cell
8 , module Language.TCT.Read
9 ) where
10
11 import Control.Monad (Monad(..), join)
12 import Control.Applicative (Applicative(..))
13 import Data.Char (Char)
14 import Data.Either (Either(..))
15 import Data.Eq (Eq(..))
16 import Data.Function (($), (.))
17 import Data.Functor (Functor(..), (<$>))
18 import Data.Foldable (toList)
19 import Data.Maybe (Maybe(..))
20 import Data.Ord (Ord(..))
21 import Data.Proxy (Proxy(..))
22 import Data.Semigroup (Semigroup(..))
23 import Data.Sequence (Seq)
24 import Data.String (IsString)
25 import Data.Text (Text)
26 import Data.Traversable (Traversable(..))
27 import Data.TreeSeq.Strict (Tree)
28 import Data.Tuple (snd)
29 import Data.Void (Void)
30 import System.IO (FilePath)
31 import Text.Show (Show(..))
32 import qualified Data.Text as Text
33 import qualified Text.Megaparsec as P
34 import qualified Data.Sequence as Seq
35 import qualified Data.TreeSeq.Strict as Tree
36
37 import Language.TCT.Tree
38 import Language.TCT.Token
39 import Language.TCT.Cell
40 import Language.TCT.Read.Cell
41 import Language.TCT.Read.Tree
42 import Language.TCT.Read.Token
43
44 import Debug.Trace (trace)
45
46 -- * Type 'TCT'
47 type TCT = Tree (Cell Key) Tokens
48
49 -- * Type 'TCTs'
50 type TCTs = Seq TCT
51
52 readTCTs ::
53 FilePath -> Text ->
54 Either (P.ParseError (P.Token Text) (P.ErrorFancy Void)) TCTs
55 readTCTs inp txt = do
56 trs <- P.runParser (p_Trees <* P.eof) inp txt
57 traverse (go Nothing) $ trace ("### TRS ###\n"<>show (Tree.Pretty trs)) trs
58 where
59 go ::
60 Maybe Key ->
61 Tree (Cell Key) (Cell Value) ->
62 Either (P.ParseError (P.Token Text) (P.ErrorFancy Void)) TCT
63 go k (Tree0 v) =
64 case k of
65 Just KeyBar{} -> Right $ Tree0 $ tokens [Tree0 $ TokenPlain <$> v]
66 Just KeyLower{} -> Right $ Tree0 $ tokens [Tree0 $ TokenPlain <$> v]
67 Just KeyEqual{} -> Right $ Tree0 $ tokens [Tree0 $ TokenPlain <$> v]
68 _ -> Tree0 . parseTokens <$> parseLexemes v
69 go _ (TreeN c@(unCell -> key) ts) =
70 case key of
71 KeyBar{} -> TreeN c <$> traverse (go (Just key)) ts
72 KeyLower{} -> TreeN c <$> traverse (go (Just key)) ts
73 KeyEqual{} -> TreeN c <$> traverse (go (Just key)) ts
74 KeyPara -> do
75 ls <-
76 (`traverse` Seq.reverse ts) $ \case
77 Tree0 v -> parseLexemes v
78 TreeN ck@(unCell -> k) vs ->
79 (pure . LexemeTree . TreeN ck <$>) $
80 traverse (go (Just k)) vs
81 let toks = parseTokens $ join $ toList ls
82 return $ Tree0 toks
83 _ -> TreeN c <$> traverse (go (Just key)) ts
84 parseLexemes ::
85 Cell Value ->
86 Either (P.ParseError (P.Token Text) (P.ErrorFancy Void)) [Lexeme]
87 parseLexemes (Cell bp _ep v) =
88 snd $
89 P.runParser'
90 (p_Lexemes <* P.eof)
91 P.State
92 { P.stateInput = v
93 , P.statePos = pure $ P.SourcePos inp
94 (P.mkPos $ linePos bp)
95 (P.mkPos $ columnPos bp)
96 , P.stateTabWidth = P.pos1
97 , P.stateTokensProcessed = 0
98 }
99
100 -- * Type 'StreamCell'
101 -- | Wrap 'Text' to have a 'P.Stream' instance
102 -- whose 'P.advance1' method abuses the tab width state
103 -- to instead pass the line indent.
104 -- This in order to report correct 'P.SourcePos'
105 -- when parsing a 'Cell' containing newlines.
106 newtype StreamCell = StreamCell { unStreamCell :: Text }
107 deriving (IsString,Eq,Ord)
108 instance P.Stream StreamCell where
109 type Token StreamCell = Char
110 type Tokens StreamCell = StreamCell
111 take1_ (StreamCell t) = (StreamCell <$>) <$> P.take1_ t
112 takeN_ n (StreamCell t) =
113 (\(ts,s) -> (StreamCell ts, StreamCell s)) <$>
114 P.takeN_ n t
115 takeWhile_ f (StreamCell t) =
116 (\(ts,s) -> (StreamCell ts, StreamCell s)) $
117 P.takeWhile_ f t
118 tokensToChunk _s ts = StreamCell (P.tokensToChunk (Proxy::Proxy Text) ts)
119 chunkToTokens _s (StreamCell ch) = P.chunkToTokens (Proxy::Proxy Text) ch
120 chunkLength _s (StreamCell ch) = P.chunkLength (Proxy::Proxy Text) ch
121 advance1 _s = advance1
122 advanceN _s indent pos (StreamCell t) = Text.foldl' (advance1 indent) pos t
123
124 advance1 :: P.Pos -> P.SourcePos -> Char -> P.SourcePos
125 advance1 indent (P.SourcePos n line col) c =
126 case c of
127 '\n' -> P.SourcePos n (line <> P.pos1) indent
128 _ -> P.SourcePos n line (col <> P.pos1)