-{-# LANGUAGE TypeFamilies #-}
module Language.TCT.Read
( module Language.TCT.Read.Tree
, module Language.TCT.Read.Token
+ , module Language.TCT.Read.Cell
, module Language.TCT.Read
) where
import Control.Applicative (Applicative(..))
-import Data.Char (Char)
+import Control.Monad (Monad(..))
import Data.Either (Either(..))
+import Data.Foldable (Foldable(..))
import Data.Function (($))
import Data.Functor ((<$>))
-import Data.Maybe (Maybe(..))
-import Data.Semigroup (Semigroup(..))
-import Data.Text (Text)
import Data.Traversable (Traversable(..))
-import Data.Tuple (fst, snd)
-import Prelude (fromIntegral)
+import Data.TreeSeq.Strict (Tree(..), Trees)
+import Data.Void (Void)
import System.IO (FilePath)
+import qualified Data.Text.Lazy as TL
import qualified Text.Megaparsec as P
-import Text.Show (Show(..))
+import Language.TCT.Debug
import Language.TCT.Tree
-import Language.TCT.Token
+import Language.TCT.Cell
+import Language.TCT.Read.Cell
import Language.TCT.Read.Tree
import Language.TCT.Read.Token
-import Debug.Trace (trace)
+-- | Parsing is done in two phases:
+--
+-- 1. indentation-sensitive parsing on 'TL.Text'
+-- 2. Pair-sensitive parsing on some 'NodeText's resulting of 1.
+readTCT ::
+ FilePath -> TL.Text ->
+ Either ErrorRead (Trees (Cell Node))
+readTCT inp txt = do
+ trs <- P.runParser (p_Trees <* P.eof) inp txt
+ traverse (go NodeGroup) $ debug0 "readTCT" trs
+ where
+ go :: Node -> Tree (Cell Node) ->
+ Either (P.ParseError (P.Token TL.Text) (P.ErrorFancy Void))
+ (Tree (Cell Node))
+ go parent t@(Tree c@(Cell bn en nod) ts) =
+ case nod of
+ NodeLower{} -> Right t
+ -- NOTE: preserve NodeText ""
+ NodeText n | TL.null n -> Right t
+ NodeText n ->
+ case parent of
+ NodeHeader HeaderBar{} -> Right t
+ NodeHeader HeaderEqual{} -> Right t
+ NodeHeader HeaderDashDash{} -> Right t
+ _ -> do
+ toks <- parseTokens <$> parseLexemes inp (Cell bn en n)
+ return $
+ case toList toks of
+ [tok] -> tok
+ _ -> Tree (Cell bn en NodeGroup) toks
+ _ -> Tree c <$> traverse (go nod') ts
+ where
+ -- NOTE: skip parent 'Node's whose semantic does not change 'NodeText'
+ nod' = case nod of
+ NodeGroup -> parent
+ NodePara -> parent
+ _ -> nod
-readTreeCell ::
- FilePath -> Text ->
- Either (P.ParseError (P.Token Text) P.Dec)
- (Trees (Cell Key) (Cell Tokens))
-readTreeCell inp txt = do
- tct <- P.runParser (p_Trees <* P.eof) inp txt
- (`traverse` tct) {- $ (<$> trace (show $ PrettyTree tct) tct)-} $ \tr ->
- sequence $ (`mapTreeWithKey`tr) $ \key (Cell pos posEnd t) ->
- case key of
- -- Verbatim Keys
- Just (unCell -> KeyBar{}) -> Right $ Cell pos posEnd $ tokens [TokenPlain t]
- Just (unCell -> KeyLower{}) -> Right $ Cell pos posEnd $ tokens [TokenPlain t]
- Just (unCell -> KeyEqual{}) -> Right $ Cell pos posEnd $ tokens [TokenPlain t]
- -- Token Keys
- _ ->
- Cell pos posEnd <$>
- P.runParser (do
- P.setTabWidth $ P.unsafePos $ fromIntegral $ columnPos pos
- P.setPosition $ P.SourcePos inp
- (P.unsafePos $ fromIntegral $ linePos pos)
- (P.unsafePos $ fromIntegral $ columnPos pos)
- p_Tokens <* P.eof
- ) inp (StreamCell t)
-
-readTree ::
- FilePath -> Text ->
- Either (P.ParseError (P.Token Text) P.Dec)
- (Trees Key Tokens)
-readTree inp txt = do
- tct <- P.runParser (p_Trees <* P.eof) inp txt
- (`traverse` tct) $ \tr ->
- sequence $ (\f -> mapTreeKey unCell f tr) $ \key (Cell pos _posEnd t) ->
- case unCell <$> key of
- -- Verbatim Keys
- Just KeyBar{} -> Right $ tokens [TokenPlain t]
- Just KeyLower{} -> Right $ tokens [TokenPlain t]
- Just KeyEqual{} -> Right $ tokens [TokenPlain t]
- -- Token Keys
- _ ->
- P.runParser (do
- P.setTabWidth $ P.unsafePos $ fromIntegral $ columnPos pos
- P.setPosition $ P.SourcePos inp
- (P.unsafePos $ fromIntegral $ linePos pos)
- (P.unsafePos $ fromIntegral $ columnPos pos)
- p_Tokens <* P.eof
- ) inp (StreamCell t)
-
--- * Type 'StreamCell'
--- | Wrap 'Text' to have a 'P.Stream' instance
--- whose 'P.updatePos' method abuses the tab width state
--- to instead pass the line indent.
--- This in order to report correct 'P.SourcePos'
--- when parsing a 'Cell' containing newlines.
-newtype StreamCell = StreamCell Text
-instance P.Stream StreamCell where
- type Token StreamCell = Char
- uncons (StreamCell t) = (StreamCell <$>) <$> P.uncons t
- updatePos _s indent apos@(P.SourcePos n l c) ch = (apos, npos)
- where
- u = P.unsafePos 1
- npos =
- case ch of
- '\n' -> P.SourcePos n (l <> u) indent
- _ -> P.SourcePos n l (c <> u)
+-- * Type 'ErrorRead'
+type ErrorRead = P.ParseError (P.Token TL.Text) (P.ErrorFancy Void)