{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Compute an Index for a DTC. module Language.DTC.Index where import Control.Applicative (Applicative(..)) import Control.Monad (Monad(..), mapM, forM) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), (.)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq) import Data.Set (Set) import Data.Text (Text) import Data.TreeSeq.Strict (Tree(..)) import Prelude (Num(..)) import Text.Show (Show(..)) import qualified Control.Monad.Trans.State as S import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.Text as Text import qualified Language.DTC.Document as DTC import Language.XML (XmlPos(..)) -- * Type 'Term' type Term = Text terms :: [Term] -> Set Term terms = Set.fromList . foldMap ((Text.strip <$>) . Text.lines) -- ** Type 'Term' type Aliases = [Term] aliasesByChar :: [Aliases] -> Map Char [Aliases] aliasesByChar = -- TODO: case insensitivity? foldr (\ts acc -> Map.insertWith (<>) (Char.toUpper $ List.head ts `Text.index` 0) [ts] acc ) Map.empty -- * Type 'Count' type Count = Int -- * Type 'Ref' data Ref = Ref { term :: Term , count :: Count , section :: XmlPos } deriving (Show) -- * Type 'State' data State = State { state_terms :: Map Term [Ref] , state_text :: Bool , state_section :: XmlPos } state :: State state = State { state_terms = Map.empty , state_text = True , state_section = def } -- * Class 'Indexify' class Indexify a where indexify :: a -> S.State State a instance Indexify (Tree k a) => Indexify [Tree k a] where indexify = mapM indexify instance Indexify a => Indexify (Tree DTC.BodyKey a) where indexify = \case Tree0 v -> Tree0 <$> indexify v TreeN k v -> case k of DTC.Section{..} -> do before@State{state_section} <- S.get S.put before{state_section = pos} t <- TreeN <$> indexify k <*> indexify v after <- S.get S.put after{state_section} return t {- instance Indexify a => Indexify [a] where indexify = mapM indexify -} instance Indexify a => Indexify (Seq a) where indexify = mapM indexify instance Indexify DTC.BodyKey where indexify = \case DTC.Section{..} -> DTC.Section pos attrs <$> indexify title <*> pure aliases instance Indexify DTC.BodyValue where indexify = \case d@DTC.ToC{} -> pure d d@DTC.ToF{} -> pure d d@DTC.Index{} -> pure d DTC.Figure{..} -> DTC.Figure pos attrs type_ <$> indexify title <*> indexify verts DTC.Vertical v -> DTC.Vertical <$> indexify v instance Indexify [DTC.Reference] where indexify = mapM indexify instance Indexify [DTC.Vertical] where indexify = mapM indexify instance Indexify [[DTC.Vertical]] where indexify = mapM (mapM indexify) instance Indexify DTC.Title where indexify (DTC.Title t) = DTC.Title <$> indexify t instance Indexify DTC.Vertical where indexify = \case DTC.Para{..} -> DTC.Para pos attrs <$> indexify horis DTC.OL{..} -> DTC.OL pos attrs <$> indexify items DTC.UL{..} -> DTC.UL pos attrs <$> indexify items DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art d@DTC.Comment{} -> pure d instance Indexify [DTC.Horizontal] where indexify hs = (concat <$>) $ forM hs $ \case d@DTC.BR -> return [d] DTC.B s -> pure . DTC.B <$> indexify s DTC.Code s -> pure . DTC.Code <$> indexify s DTC.Del s -> pure . DTC.Del <$> indexify s DTC.I s -> pure . DTC.I <$> indexify s DTC.Note s -> pure . DTC.Note <$> indexify s DTC.Q s -> pure . DTC.Q <$> indexify s DTC.SC s -> pure . DTC.SC <$> indexify s DTC.Sub s -> pure . DTC.Sub <$> indexify s DTC.Sup s -> pure . DTC.Sup <$> indexify s DTC.U s -> pure . DTC.U <$> indexify s DTC.Eref{..} -> pure . DTC.Eref href <$> indexify text DTC.Iref{..} -> pure . DTC.Iref count term <$> indexify text DTC.Ref{..} -> pure . DTC.Ref to <$> indexify text DTC.Rref{..} -> pure . DTC.Rref to <$> indexify text DTC.Plain p -> List.reverse <$> go 0 p p [] where flt c = Char.isAlphaNum c || Char.isPunctuation c go :: Int -> Text -> Text -> [DTC.Horizontal] -> S.State State [DTC.Horizontal] go len plain curr acc = -- NOTE: keep the number of DTC.Plain to the minimum -- while sharing their Text with the original. case Text.span flt curr of ("","") | len > 0 -> return $ DTC.Plain plain:acc | otherwise -> return acc ("",r) -> case Text.break flt r of (m,n) -> go (len + Text.length m) plain n acc (term,r) -> do st@State{..} <- S.get case Map.updateLookupWithKey (\_w refs -> let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in Just $ Ref{term, count, section=state_section} : refs) term state_terms of (Nothing, _) -> go (len + Text.length term) plain r acc (Just refs, m) -> do S.put st{state_terms=m} go 0 r r $ DTC.Iref { term , count = count $ List.head refs , text = [DTC.Plain term] } : if len > 0 then DTC.Plain (Text.take len plain) : acc else acc instance Indexify DTC.Reference where indexify = return instance Indexify DTC.Artwork where indexify = return {- countIref :: Seq XmlPos -> Int countIref s = case Seq.viewr s of _ :> XmlPos{xmlPosAncestors=(_,c):_} -> c + 1 _ -> 0 data Iref = Iref { iref_pos :: Seq.XmlPos , iref_ } -- Map Text DTC.Horizontal -}