{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} module Hdoc.DTC.Analyze.Index where -- import Data.Eq (Eq(..)) -- import Text.Show (Show(..)) -- import qualified Control.Monad.Trans.Writer as W -- import qualified Data.HashMap.Strict as HM -- import qualified Data.Tree as Tree -- import qualified Data.TreeSeq.Strict as TS -- import qualified Hjugement as MJ import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Either (Either(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), (.), const) import Data.Functor ((<$>), (<$)) import Data.Functor.Compose (Compose(..)) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, (|>)) import Data.Traversable (Traversable(..)) import Data.TreeMap.Strict (TreeMap(..)) import Data.TreeSeq.Strict (Tree(..), tree0) import qualified Control.Monad.Trans.Reader as R import qualified Control.Monad.Trans.State as S import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Sequence as Seq import qualified Data.Strict.Maybe as Strict import qualified Data.Text.Lazy as TL import qualified Data.TreeMap.Strict as TM import Hdoc.DTC.Document as DTC -- import Hdoc.TCT.Cell as TCT -- import qualified Hdoc.XML as XML -- * Type 'IrefsByWords' type IrefsByWords = TM.TreeMap Word (Seq (Either Head Section)) irefsOfTerms :: Terms -> IrefsByWords irefsOfTerms = TM.fromList const . (>>= f) . concat where f [] = [] f ws = maybe [] (\p -> [(p,Seq.empty)]) $ pathFromWords ws wordify :: TL.Text -> Words wordify = List.reverse . go [] where go :: Words -> TL.Text -> Words go acc t = case TL.span Char.isAlphaNum t of ("",_) -> case TL.span Char.isSpace t of ("",_) -> case TL.uncons t of Nothing -> acc Just (c,r) -> go (Word (TL.singleton c) : acc) r (_s,r) -> go (Space : acc) r (w,r) -> go (Word w : acc) r plainifyWord :: WordOrSpace -> TL.Text plainifyWord = \case Word w -> w Space -> " " plainifyWords :: Words -> TL.Text plainifyWords = TL.concat . (plainifyWord <$>) termsByChar :: Terms -> Map Char Terms termsByChar = foldr (\aliases acc -> case aliases of (Word w:_):_ | not (TL.null w) -> Map.insertWith (<>) (Char.toUpper $ TL.index w 0) [aliases] acc _ -> acc ) Map.empty -- * Type 'Reader' data Reader = Reader { reader_section :: Either Head Section } instance Default Reader where def = Reader { reader_section = Left def } -- * Type 'State' type State = IrefsByWords -- * Class 'Indexify' class Indexify a where indexify :: a -> Compose (R.Reader Reader) (S.State State) a instance Indexify Document where indexify Document{..} = Compose $ R.local (\ro -> ro{reader_section = Left head}) $ getCompose $ Document <$> indexify head <*> traverse indexify body instance Indexify Head where indexify h@Head{..} = pure h instance Indexify (Tree BodyNode) where indexify (Tree n ts) = case n of BodyBlock b -> Tree . BodyBlock <$> indexify b <*> traverse indexify ts BodySection section@Section{..} -> Compose $ R.local (\ro -> ro{reader_section = Right section}) $ getCompose $ Tree . BodySection <$> indexify section <*> traverse indexify ts instance Indexify Section where indexify Section{..} = Section section_posXML section_attrs <$> indexify section_title <*> pure section_aliases <*> traverse indexify section_judgments instance Indexify Block where indexify b = case b of BlockPara p -> BlockPara <$> indexify p BlockBreak{} -> pure b BlockToC{} -> pure b BlockToF{} -> pure b BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks BlockIndex{..} -> pure b BlockFigure{..} -> BlockFigure posXML type_ attrs <$> traverse indexify mayTitle <*> traverse indexify paras BlockReferences{..} -> BlockReferences posXML attrs <$> traverse indexify refs BlockJudges js -> BlockJudges <$> indexify js BlockGrades{..} -> BlockGrades posXML attrs <$> indexify scale instance Indexify Para where indexify = \case ParaItem{..} -> ParaItem <$> indexify item ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items instance Indexify ParaItem where indexify = \case ParaPlain plain -> ParaPlain <$> indexify plain ParaOL items -> ParaOL <$> traverse indexify items ParaUL items -> ParaUL <$> traverse (traverse indexify) items ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras p@ParaArtwork{} -> pure p p@ParaComment{} -> pure p ParaJudgment j -> ParaJudgment <$> indexify j instance Indexify ListItem where indexify ListItem{..} = ListItem name <$> traverse indexify paras instance Indexify Plain where indexify = traverse indexify instance Indexify (Tree PlainNode) where indexify (Tree n ts) = case n of PlainIref{..} -> case pathFromWords iref_term of Nothing -> Tree n <$> traverse indexify ts Just words -> Compose $ R.ask >>= \ro -> getCompose $ Tree n <$ Compose (pure (S.modify' $ \state_irefsByWords -> TM.insert (<>) words (pure $ reader_section ro) $ state_irefsByWords )) <*> traverse indexify ts PlainText txt -> -- TODO: introduce PlainGroup only when necessary? Tree PlainGroup <$> indexifyWords (wordify txt) _ -> Tree n <$> traverse indexify ts instance Indexify Title where indexify (Title p) = Title <$> indexify p instance Indexify About where indexify About{..} = About about_headers <$> traverse indexify about_titles <*> pure about_url <*> traverse indexify about_authors <*> traverse indexify about_editor <*> traverse indexify about_date <*> pure about_tags <*> pure about_links <*> pure about_series <*> traverse indexify about_includes instance Indexify Entity where indexify = pure -- TODO: to be coded instance Indexify Date where indexify = pure -- TODO: to be coded instance Indexify Include where indexify = pure -- TODO: to be coded instance Indexify Reference where indexify Reference{..} = do Reference reference_posXML reference_locTCT reference_id <$> indexify reference_about {- st@State{state_collect=All{..}, ..} <- S.get let targets = HM.lookupDefault Seq.empty reference_id all_reference case toList targets of [] -> undefined [_] -> do _ -> do let err = HM.insertWith (flip (<>)) reference_id (pure reference_locTCT) $ errors_reference_ambiguous state_errors S.put st { state_errors = state_errors { errors_reference_ambiguous = err } } about <- indexify reference_about return $ Reference { reference_error = Just $ ErrorAnchor_Ambiguous $ Nat1 $ length $ err HM.!reference_id , reference_about = about , .. } -} instance Indexify Judges where indexify Judges{..} = Judges judges_locTCT judges_posXML judges_attrs <$> traverse (traverse indexify) judges_byName instance Indexify [Grade] where indexify = traverse indexify instance Indexify Judgment where indexify Judgment{..} = Judgment judgment_opinionsByChoice judgment_judges judgment_grades judgment_posXML judgment_locTCT judgment_judgesId judgment_gradesId judgment_importance <$> traverse indexify judgment_question <*> traverse indexify judgment_choices instance Indexify Choice where indexify Choice{..} = Choice choice_locTCT choice_posXML <$> traverse indexify choice_title <*> traverse indexify choice_opinions instance Indexify Opinion where indexify Opinion{..} = Opinion opinion_locTCT opinion_posXML opinion_judge opinion_grade opinion_importance <$> traverse indexify opinion_comment instance Indexify Grade where indexify Grade{..} = Grade grade_posXML grade_name grade_color grade_isDefault <$> traverse indexify grade_title instance Indexify Judge where indexify Judge{..} = Judge judge_locTCT judge_posXML judge_name <$> traverse indexify judge_title <*> pure judge_defaultGrades indexifyWords :: Words -> Compose (R.Reader Reader) (S.State IrefsByWords) Plain indexifyWords ws = Compose $ R.ask >>= \case ro -> pure $ go mempty ws where go :: Plain -> Words -> S.State IrefsByWords Plain go acc inp = case inp of [] -> return acc Space : next -> go (acc |> tree0 (PlainText " ")) next Word w : next -> goWords [] inp >>= \case Nothing -> go (acc |> tree0 (PlainText w)) next Just (ls, ns) -> do let iref_term = List.reverse ls let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term go (acc |> Tree PlainIref{iref_term} lines) ns goWords :: Words -> Words -> S.State IrefsByWords (Maybe (Words, Words)) goWords prev inp = do TM.TreeMap irefsByWord <- S.get case inp of [] -> return Nothing curr@Space : next -> goWords (curr:prev) next curr@(Word w) : next -> case Map.lookup w irefsByWord of Nothing -> return Nothing Just nod@TM.Node{..} -> do let prev' = curr:prev S.put node_descendants case node_value of Strict.Nothing | null node_descendants -> return Nothing | otherwise -> goWords prev' next <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord) Strict.Just irefs -> goWords prev' next >>= \case Nothing -> do S.put $ TM.TreeMap $ Map.insert w nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs} irefsByWord return $ Just (prev', next) r@Just{} -> do S.modify' $ \rs -> TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord return r