{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Compute an Index for a DTC. module Language.DTC.Index where import Control.Applicative (Applicative(..)) import Control.Category import Control.Monad (Monad(..), mapM) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), const) import Data.Functor ((<$>)) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.Text (Text) import Data.Traversable (Traversable(..)) import Data.TreeMap.Strict (TreeMap(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (Num(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Sequence as Seq import qualified Data.Strict.Maybe as Strict import qualified Data.Text as Text import qualified Data.TreeMap.Strict as TreeMap import qualified Data.TreeSeq.Strict as Tree import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words) import Language.XML (XmlPos(..)) import qualified Language.DTC.Document as DTC -- import Debug.Trace (trace) termsByChar :: Terms -> Map Char Terms termsByChar = foldr (\aliases acc -> case aliases of (Word w:_):_ | not (Text.null w) -> Map.insertWith (<>) (Char.toUpper $ Text.index w 0) [aliases] acc _ -> acc ) Map.empty -- * Type 'Ref' data Ref = Ref { term :: Words , count :: Count , section :: XmlPos } deriving (Show) -- ** Type 'Path' type Path = TreeMap.Path Word -- ** Type 'Refs' type Refs = TreeMap Word [Ref] refsOfTerms :: Terms -> Refs refsOfTerms = TreeMap.fromList const . (>>= f) . concat where f [] = [] f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws pathFromWords :: Words -> Maybe Path pathFromWords ws = case ws >>= unSpace of p:ps | not (Text.null p) -> Just (TreeMap.path p ps) _ -> Nothing where unSpace = \case Space -> [] Word w -> [w] -- * Type 'State' data State = State { state_refs :: Refs , state_text :: Bool , state_section :: XmlPos } state :: State state = State { state_refs = mempty , state_text = True , state_section = def } -- * Class 'Indexify' class Indexify a where indexify :: a -> S.State State a instance Indexify (Tree k a) => Indexify [Tree k a] where indexify = mapM indexify instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where indexify = \case Tree0 v -> Tree0 <$> indexify v TreeN k v -> case k of DTC.Section{..} -> do before@State{state_section} <- S.get S.put before{state_section = pos} t <- TreeN <$> indexify k <*> indexify v after <- S.get S.put after{state_section} return t instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where indexify = mapM indexify {- instance Indexify a => Indexify (Seq a) where indexify = mapM indexify -} instance Indexify DTC.BodyKey where indexify = \case DTC.Section{..} -> DTC.Section pos attrs <$> indexify title <*> pure aliases instance Indexify DTC.BodyValue where indexify = \case d@DTC.ToC{} -> pure d d@DTC.ToF{} -> pure d d@DTC.Index{} -> pure d DTC.Figure{..} -> DTC.Figure pos attrs type_ <$> indexify title <*> indexify blocks DTC.Block v -> DTC.Block <$> indexify v instance Indexify [DTC.Reference] where indexify = mapM indexify instance Indexify [DTC.Block] where indexify = mapM indexify instance Indexify [[DTC.Block]] where indexify = mapM (mapM indexify) instance Indexify DTC.Title where indexify (DTC.Title t) = DTC.Title <$> indexify t instance Indexify DTC.Block where indexify = \case DTC.Para{..} -> DTC.Para pos attrs <$> indexify lines DTC.OL{..} -> DTC.OL pos attrs <$> indexify items DTC.UL{..} -> DTC.UL pos attrs <$> indexify items DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art d@DTC.Comment{} -> pure d instance Indexify DTC.Lines where indexify ls = Tree.joinTrees <$> traverse (traverse go) ls where go = \case DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR DTC.Plain p -> do State{..} <- S.get let (refs,ret) = indexifyWords state_section state_refs (wordify p) S.modify $ \s -> s{state_refs=refs} return ret instance Indexify DTC.Reference where indexify = return instance Indexify DTC.Artwork where indexify = return wordify :: Text -> Words wordify = List.reverse . go [] where go :: Words -> Text -> Words go acc t = case Text.span Char.isAlphaNum t of ("",_) -> case Text.span Char.isSpace t of ("",_) -> case Text.uncons t of Nothing -> acc Just (c,r) -> go (Word (Text.singleton c) : acc) r (_s,r) -> go (Space : acc) r (w,r) -> go (Word w : acc) r plainifyWord :: WordOrSpace -> Text plainifyWord = \case Word w -> w Space -> " " plainifyWords :: Words -> Text plainifyWords = Text.concat . (plainifyWord <$>) indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Lines) indexifyWords section = go mempty where go :: DTC.Lines -> Refs -> Words -> (Refs, DTC.Lines) go acc refs inp = case inp of [] -> (refs, acc) Space : next -> go (acc |> Tree0 (DTC.Plain " ")) refs next Word w : next -> case goWords [] refs [] inp of Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next Just (Ref{term,count}, ls, ns, rs) -> let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in go (acc |> TreeN DTC.Iref{term, count} lines) rs ns goWords :: Words -> Refs -> Words -> Words -> Maybe (Ref, Words, Words, Refs) goWords ws m@(TreeMap refsByWord) prev inp = case inp of [] -> Nothing curr@Space : next -> goWords (curr:ws) m (curr:prev) next curr@(Word w) : next -> let words = curr:ws in case Map.lookup w refsByWord of Nothing -> Nothing Just nod@TreeMap.Node{..} -> case node_value of Strict.Nothing -> if null node_descendants then Nothing else case goWords words node_descendants (curr:prev) next of Nothing -> Nothing Just (ref, ls, ns, rs) -> Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord) Strict.Just refs -> case goWords words node_descendants (curr:prev) next of Nothing -> let term = List.reverse words in let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in let ref = Ref{term, count, section} in Just (ref, curr:prev, next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord) Just (ref, ls, ns, rs) -> Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)