]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read.hs
Use Text.Lazy to speedup Token parsing.
[doclang.git] / Language / TCT / Read.hs
1 {-# LANGUAGE TypeFamilies #-}
2 module Language.TCT.Read
3 ( module Language.TCT.Read.Tree
4 , module Language.TCT.Read.Token
5 , module Language.TCT.Read
6 ) where
7
8 import Control.Applicative (Applicative(..))
9 import Data.Char (Char)
10 import Data.Either (Either(..))
11 import Data.Function (($))
12 import Data.Functor ((<$>))
13 import Data.Maybe (Maybe(..))
14 import Data.Semigroup (Semigroup(..))
15 import Data.Text (Text)
16 import Data.Traversable (Traversable(..))
17 import Data.Tuple (fst, snd)
18 import Prelude (fromIntegral)
19 import System.IO (FilePath)
20 import Text.Show (Show(..))
21 import qualified Text.Megaparsec as P
22 import qualified Data.Text.Lazy as TL
23
24 import Language.TCT.Tree
25 import Language.TCT.Token
26 import Language.TCT.Read.Tree
27 import Language.TCT.Read.Token
28
29 import Debug.Trace (trace)
30
31 readTCT ::
32 FilePath -> Text ->
33 Either (P.ParseError (P.Token TL.Text) P.Dec)
34 (Trees (Cell Key) (Cell Tokens))
35 readTCT inp txt = do
36 tct <- P.runParser (p_Trees <* P.eof) inp txt
37 (`traverse`tct) {-(<$> trace (show $ PrettyTree tct) tct)-} $ \tr ->
38 sequence $ (`mapTreeWithKey`tr) $ \key (Cell pos posEnd t) ->
39 case key of
40 -- Verbatim Keys
41 Just (unCell -> KeyBar{}) ->
42 Right $ Cell pos posEnd $ tokens [TokenPlain t]
43 Just (unCell -> KeyLower{}) ->
44 Right $ Cell pos posEnd $ tokens [TokenPlain t]
45 Just (unCell -> KeyEqual{}) ->
46 Right $ Cell pos posEnd $ tokens [TokenPlain t]
47 -- Token Keys
48 _ ->
49 Cell pos posEnd <$>
50 P.runParser (do
51 P.setTabWidth $ P.unsafePos $ fromIntegral $ columnPos pos
52 P.setPosition $ P.SourcePos inp
53 (P.unsafePos $ fromIntegral $ linePos pos)
54 (P.unsafePos $ fromIntegral $ columnPos pos)
55 p_Tokens <* P.eof
56 ) inp (StreamCell t)
57
58 -- * Type 'StreamCell'
59 -- | Wrap 'Text' to have a 'P.Stream' instance
60 -- whose 'P.updatePos' method abuses the tab width state
61 -- to instead pass the line indent.
62 -- This in order to report correct 'P.SourcePos'
63 -- when parsing a 'Cell' containing newlines.
64 newtype StreamCell = StreamCell TL.Text
65 instance P.Stream StreamCell where
66 type Token StreamCell = Char
67 uncons (StreamCell t) = (StreamCell <$>) <$> P.uncons t
68 updatePos _s indent apos@(P.SourcePos n l c) ch = (apos, npos)
69 where
70 u = P.unsafePos 1
71 npos =
72 case ch of
73 '\n' -> P.SourcePos n (l <> u) indent
74 _ -> P.SourcePos n l (c <> u)