{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Compute an Index for a DTC. module Language.DTC.Index where import Control.Applicative (Applicative(..)) import Control.Category import Control.Monad (Monad(..), mapM, join) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), const) import Data.Functor ((<$>)) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.Text (Text) import Data.Traversable (Traversable(..)) import Data.TreeMap.Strict (TreeMap(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (Num(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Sequence as Seq import qualified Data.Strict.Maybe as Strict import qualified Data.Text as Text import qualified Data.TreeMap.Strict as TreeMap import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words) import Language.XML (XmlPos(..)) import qualified Language.DTC.Document as DTC termsByChar :: Terms -> Map Char Terms termsByChar = foldr (\aliases acc -> case aliases of (Word w:_):_ | not (Text.null w) -> Map.insertWith (<>) (Char.toUpper $ Text.index w 0) [aliases] acc _ -> acc ) Map.empty -- * Type 'Ref' data Ref = Ref { term :: Words , count :: Count , section :: XmlPos } deriving (Show) -- ** Type 'Path' type Path = TreeMap.Path Word -- ** Type 'Refs' type Refs = TreeMap Word [Ref] refsOfTerms :: Terms -> Refs refsOfTerms = TreeMap.fromList const . (>>= f) . concat where f [] = [] f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws pathFromWords :: Words -> Maybe Path pathFromWords ws = case ws >>= unSpace of p:ps | not (Text.null p) -> Just (TreeMap.path p ps) _ -> Nothing where unSpace = \case Space -> [] Word w -> [w] -- * Type 'State' data State = State { state_refs :: Refs , state_text :: Bool , state_section :: XmlPos } state :: State state = State { state_refs = mempty , state_text = True , state_section = def } -- * Class 'Indexify' class Indexify a where indexify :: a -> S.State State a instance Indexify (Tree k a) => Indexify [Tree k a] where indexify = mapM indexify instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where indexify = \case Tree0 v -> Tree0 <$> indexify v TreeN k v -> case k of DTC.Section{..} -> do before@State{state_section} <- S.get S.put before{state_section = pos} t <- TreeN <$> indexify k <*> indexify v after <- S.get S.put after{state_section} return t instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where indexify = mapM indexify instance Indexify DTC.BodyKey where indexify = \case DTC.Section{..} -> DTC.Section pos attrs <$> indexify title <*> pure aliases instance Indexify DTC.BodyValue where indexify = \case d@DTC.ToC{} -> pure d d@DTC.ToF{} -> pure d d@DTC.Index{} -> pure d DTC.Figure{..} -> DTC.Figure pos attrs type_ <$> indexify title <*> indexify blocks DTC.Block v -> DTC.Block <$> indexify v instance Indexify [DTC.Reference] where indexify = mapM indexify instance Indexify [DTC.Block] where indexify = mapM indexify instance Indexify [[DTC.Block]] where indexify = mapM (mapM indexify) instance Indexify DTC.Title where indexify (DTC.Title t) = DTC.Title <$> indexify t instance Indexify DTC.Block where indexify = \case DTC.Para{..} -> DTC.Para pos attrs <$> indexify para DTC.OL{..} -> DTC.OL pos attrs <$> indexify items DTC.UL{..} -> DTC.UL pos attrs <$> indexify items DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art d@DTC.Comment{} -> pure d instance Indexify DTC.Para where indexify ls = join <$> traverse indexifyLines ls instance Indexify DTC.Reference where indexify = return instance Indexify DTC.Artwork where indexify = return indexifyLines :: DTC.Lines -> S.State State DTC.Para indexifyLines = \case Tree0 a -> indexifyPlain a TreeN k@DTC.Iref{term} ts | Just words <- pathFromWords term -> do State{state_refs, state_section} <- S.get case TreeMap.lookup words state_refs of Strict.Nothing -> Seq.singleton . TreeN k . join <$> traverse indexifyLines ts Strict.Just refs -> do let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 let ref = Ref{term, count, section=state_section} S.modify $ \s -> s{state_refs= TreeMap.insert const words (ref:refs) state_refs} Seq.singleton . TreeN DTC.Iref{DTC.term, DTC.count} . join <$> traverse indexifyLines ts TreeN k ts -> Seq.singleton . TreeN k . join <$> traverse indexifyLines ts indexifyPlain :: DTC.LineValue -> S.State State DTC.Para indexifyPlain = \case DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR DTC.Plain p -> do State{..} <- S.get let (refs,ts) = indexifyWords state_section state_refs (wordify p) S.modify $ \s -> s{state_refs=refs} return ts indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Para) indexifyWords section = go mempty where go :: DTC.Para -> Refs -> Words -> (Refs, DTC.Para) go acc refs inp = case inp of [] -> (refs, acc) Space : next -> go (acc |> Tree0 (DTC.Plain " ")) refs next Word w : next -> case goWords [] refs [] inp of Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next Just (Ref{term,count}, ls, ns, rs) -> let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in go (acc |> TreeN DTC.Iref{term, count} lines) rs ns goWords :: Words -> Refs -> Words -> Words -> Maybe (Ref, Words, Words, Refs) goWords ws m@(TreeMap refsByWord) prev inp = case inp of [] -> Nothing curr@Space : next -> goWords (curr:ws) m (curr:prev) next curr@(Word w) : next -> let words = curr:ws in case Map.lookup w refsByWord of Nothing -> Nothing Just nod@TreeMap.Node{..} -> case node_value of Strict.Nothing | null node_descendants -> Nothing | otherwise -> (<$> goWords words node_descendants (curr:prev) next) $ \(ref, ls, ns, rs) -> (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord) Strict.Just refs -> case goWords words node_descendants (curr:prev) next of Nothing -> let term = List.reverse words in let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in let ref = Ref{term, count, section} in Just (ref, curr:prev, next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord) Just (ref, ls, ns, rs) -> Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord) wordify :: Text -> Words wordify = List.reverse . go [] where go :: Words -> Text -> Words go acc t = case Text.span Char.isAlphaNum t of ("",_) -> case Text.span Char.isSpace t of ("",_) -> case Text.uncons t of Nothing -> acc Just (c,r) -> go (Word (Text.singleton c) : acc) r (_s,r) -> go (Space : acc) r (w,r) -> go (Word w : acc) r plainifyWord :: WordOrSpace -> Text plainifyWord = \case Word w -> w Space -> " " plainifyWords :: Words -> Text plainifyWords = Text.concat . (plainifyWord <$>)