1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 module Hdoc.DTC.Analyze.Index where
5 import Control.Applicative (Applicative(..))
6 import Control.Monad (Monad(..))
8 import Data.Char (Char)
9 import Data.Default.Class (Default(..))
10 import Data.Foldable (Foldable(..), concat)
11 import Data.Function (($), (.), const)
12 import Data.Functor ((<$>), (<$))
13 import Data.Functor.Compose (Compose(..))
14 import Data.Map.Strict (Map)
15 import Data.Maybe (Maybe(..), maybe)
16 import Data.Monoid (Monoid(..))
17 import Data.Semigroup (Semigroup(..))
18 import Data.Sequence ((|>))
19 import Data.Traversable (Traversable(..))
20 import Data.TreeMap.Strict (TreeMap(..))
21 import Data.TreeSeq.Strict (Tree(..), tree0)
22 import qualified Data.HashMap.Strict as HM
23 -- import qualified Control.Monad.Trans.RWS.Strict as RWS
24 import qualified Control.Monad.Trans.Reader as R
25 import qualified Control.Monad.Trans.State as S
26 import qualified Data.Char as Char
27 import qualified Data.List as List
28 import qualified Data.Map.Strict as Map
29 import qualified Data.Sequence as Seq
30 import qualified Data.Strict.Maybe as Strict
31 import qualified Data.Text.Lazy as TL
32 import qualified Data.TreeMap.Strict as TM
33 import qualified Data.TreeSeq.Strict as TS
35 import Hdoc.DTC.Document as DTC
36 import qualified Hdoc.XML as XML
41 { index_terms :: TM.TreeMap Word (Seq (Either Head Section))
42 , index_tag :: HS.HashSet Ident
43 , index_at :: HS.HashSet Ident
45 instance Default Index where
54 data Indexable = Indexable
55 { indexable_terms :: Terms
56 , indexable_tags :: HS.HashSet Ident
60 -- | Build an 'Index' a whole 'Document', returning a mangled version of it
61 -- where 'PlainIref's are inserted as required by the given 'Terms'.
62 indexifyDocument :: Terms -> Document -> (Document, Index)
63 indexifyDocument terms doc =
64 let index = indexOfTerms terms in
65 if null terms then (doc, def)
67 (`S.runState` index) $
72 indexOfTerms :: Terms -> Index
73 indexOfTerms = TM.fromList const . (>>= f) . concat
76 f ws = maybe [] (\p -> [(p, Seq.empty)]) $ pathFromWords ws
79 newtype Reader = Reader
80 { reader_section :: Section
82 instance Default Reader where
84 { reader_section = def
91 class Indexify a where
92 indexify :: a -> Compose (R.Reader Reader) (S.State State) a
93 instance Indexify Document where
94 indexify Document{..} =
96 <$> traverse indexify document_head
97 <*> traverse indexify document_body
98 instance Indexify Head where
101 <$> indexify head_section
102 <*> traverse indexify head_body
103 instance Indexify (Tree BodyNode) where
104 indexify (Tree n ts) =
109 <*> traverse indexify ts
110 BodySection section@Section{..} ->
111 Compose $ R.local (\ro -> ro{reader_section = section}) $
115 <*> traverse indexify ts
116 instance Indexify Section where
117 indexify Section{..} =
118 Section section_posXML section_locTCT section_attrs
119 <$> indexify section_about
120 instance Indexify Block where
121 indexify b = case b of
122 BlockPara p -> BlockPara <$> indexify p
123 BlockBreak{} -> pure b
126 BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks
127 BlockIndex{..} -> pure b
129 BlockFigure posXML type_ attrs
130 <$> traverse indexify mayTitle
131 <*> traverse indexify paras
132 BlockReferences{..} ->
133 BlockReferences posXML attrs
134 <$> traverse indexify refs
135 BlockJudges js -> BlockJudges <$> indexify js
137 BlockGrades posXML attrs
139 instance Indexify Para where
141 ParaItem{..} -> ParaItem <$> indexify item
142 ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items
143 instance Indexify ParaItem where
145 ParaPlain plain -> ParaPlain <$> indexify plain
146 ParaOL items -> ParaOL <$> traverse indexify items
147 ParaUL items -> ParaUL <$> traverse (traverse indexify) items
148 ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras
149 p@ParaArtwork{} -> pure p
150 p@ParaComment{} -> pure p
151 ParaJudgment j -> ParaJudgment <$> indexify j
152 instance Indexify ListItem where
153 indexify ListItem{..} = ListItem name <$> traverse indexify paras
154 instance Indexify Plain where
155 indexify = traverse indexify
156 instance Indexify (Tree PlainNode) where
157 indexify (Tree n ts) =
160 case pathFromWords iref_term of
161 Nothing -> Tree n <$> traverse indexify ts
163 Compose $ R.ask >>= \ro -> getCompose $
165 <$ Compose (pure (S.modify' $ \state_irefsByWords ->
166 TM.insert (<>) words (pure $ reader_section ro) $
169 <*> traverse indexify ts
171 -- TODO: introduce PlainGroup only when necessary?
172 Tree PlainGroup <$> indexifyWords (wordify txt)
173 _ -> Tree n <$> traverse indexify ts
174 instance Indexify Title where
175 indexify (Title p) = Title <$> indexify p
176 instance Indexify About where
179 <$> traverse indexify about_titles
180 <*> pure about_aliases
181 <*> traverse indexify about_authors
182 <*> traverse indexify about_dates
185 <*> pure about_series
186 <*> traverse indexify about_description
187 <*> traverse indexify about_judgments
188 instance Indexify Entity where
189 indexify = pure -- TODO: to be coded
190 instance Indexify Date where
191 indexify = pure -- TODO: to be coded
192 instance Indexify Include where
193 indexify = pure -- TODO: to be coded
194 instance Indexify Reference where
195 indexify Reference{..} = do
200 <$> indexify reference_about
201 instance Indexify Judges where
202 indexify Judges{..} =
207 <$> traverse (traverse indexify) judges_byName
208 instance Indexify [Grade] where
209 indexify = traverse indexify
210 instance Indexify Judgment where
211 indexify Judgment{..} =
213 judgment_opinionsByChoice
222 <$> traverse indexify judgment_question
223 <*> traverse indexify judgment_choices
224 instance Indexify Choice where
225 indexify Choice{..} =
226 Choice choice_locTCT choice_posXML
227 <$> traverse indexify choice_title
228 <*> traverse indexify choice_opinions
229 instance Indexify Opinion where
230 indexify Opinion{..} =
238 <$> traverse indexify opinion_comment
239 instance Indexify Grade where
246 <$> traverse indexify grade_title
247 instance Indexify Judge where
253 <$> traverse indexify judge_title
254 <*> pure judge_defaultGrades
257 wordify :: TL.Text -> Words
258 wordify = List.reverse . go []
260 go :: Words -> TL.Text -> Words
262 case TL.span Char.isAlphaNum t of
264 case TL.span Char.isSpace t of
268 Just (c,r) -> go (Word (TL.singleton c) : acc) r
269 (_s,r) -> go (Space : acc) r
270 (w,r) -> go (Word w : acc) r
272 plainifyWord :: WordOrSpace -> TL.Text
277 plainifyWords :: Words -> TL.Text
278 plainifyWords = TL.concat . (plainifyWord <$>)
280 indexifyWords :: Words -> Compose (R.Reader Reader) (S.State Index) Plain
282 Compose $ R.ask >>= \case
283 ro -> pure $ go mempty ws
285 go :: Plain -> Words -> S.State Index Plain
290 go (acc |> tree0 (PlainText " ")) next
292 goWords [] inp >>= \case
293 Nothing -> go (acc |> tree0 (PlainText w)) next
295 let iref_term = List.reverse ls
296 let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term
297 go (acc |> Tree PlainIref{iref_term} lines) ns
300 S.State Index (Maybe (Words, Words))
301 goWords prev inp = do
302 TM.TreeMap irefsByWord <- S.get
305 curr@Space : next -> goWords (curr:prev) next
306 curr@(Word w) : next ->
307 case Map.lookup w irefsByWord of
308 Nothing -> return Nothing
309 Just nod@TM.Node{..} -> do
310 let prev' = curr:prev
311 S.put node_descendants
314 | null node_descendants -> return Nothing
315 | otherwise -> goWords prev' next
316 <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord)
318 goWords prev' next >>= \case
322 nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs}
324 return $ Just (prev', next)
327 TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord
330 termsByChar :: Terms -> Map Char Terms
332 foldr (\aliases acc ->
334 (Word w:_):_ | not (TL.null w) ->
336 (Char.toUpper $ TL.index w 0)
341 -- * Type 'IndexByPos'
342 type IndexByPos = HM.HashMap XML.Pos Terms
344 -- ** Class 'CollectIndex'
345 class CollectIndex a where
346 collectIndex :: a -> IndexByPos
347 instance CollectIndex Document where
348 collectIndex Document{..} =
349 collectIndex document_body
350 instance CollectIndex (TS.Trees BodyNode) where
351 collectIndex = foldMap $ \(TS.Tree b bs) ->
353 BodyBlock blk -> collectIndex blk
354 BodySection _ -> collectIndex bs
355 instance CollectIndex Block where
357 BlockIndex{..} -> HM.singleton posXML index