1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 module Hdoc.DTC.Analyze.Index where
5 -- import Data.Eq (Eq(..))
6 -- import Text.Show (Show(..))
7 -- import qualified Control.Monad.Trans.Writer as W
8 -- import qualified Data.HashMap.Strict as HM
9 -- import qualified Data.Tree as Tree
10 -- import qualified Data.TreeSeq.Strict as TS
11 -- import qualified Hjugement as MJ
12 import Control.Applicative (Applicative(..))
13 import Control.Monad (Monad(..))
15 import Data.Char (Char)
16 import Data.Default.Class (Default(..))
17 import Data.Either (Either(..))
18 import Data.Foldable (Foldable(..), concat)
19 import Data.Function (($), (.), const)
20 import Data.Functor ((<$>), (<$))
21 import Data.Functor.Compose (Compose(..))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (Seq, (|>))
27 import Data.Traversable (Traversable(..))
28 import Data.TreeMap.Strict (TreeMap(..))
29 import Data.TreeSeq.Strict (Tree(..), tree0)
30 import qualified Data.HashMap.Strict as HM
31 -- import qualified Control.Monad.Trans.RWS.Strict as RWS
32 import qualified Control.Monad.Trans.Reader as R
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Sequence as Seq
38 import qualified Data.Strict.Maybe as Strict
39 import qualified Data.Text.Lazy as TL
40 import qualified Data.TreeMap.Strict as TM
41 import qualified Data.TreeSeq.Strict as TS
43 import Hdoc.DTC.Document as DTC
44 -- import Hdoc.TCT.Cell as TCT
45 import qualified Hdoc.XML as XML
48 type Index = TM.TreeMap Word (Seq (Either Head Section))
50 -- | Build an 'Index' a whole 'Document', returning a mangled version of it
51 -- where 'PlainIref's are inserted as required by the given 'Terms'.
52 indexifyDocument :: Terms -> Document -> (Document, Index)
53 indexifyDocument terms doc =
54 let index = indexOfTerms terms in
55 if null terms then (doc, def)
57 (`S.runState` index) $
62 indexOfTerms :: Terms -> Index
63 indexOfTerms = TM.fromList const . (>>= f) . concat
66 f ws = maybe [] (\p -> [(p, Seq.empty)]) $ pathFromWords ws
69 newtype Reader = Reader
70 { reader_section :: Either Head Section
72 instance Default Reader where
74 { reader_section = Left def
81 class Indexify a where
82 indexify :: a -> Compose (R.Reader Reader) (S.State State) a
83 instance Indexify Document where
84 indexify Document{..} =
85 Compose $ R.local (\ro -> ro{reader_section = Left head}) $
89 <*> traverse indexify body
90 instance Indexify Head where
91 indexify h@Head{..} = pure h
92 instance Indexify (Tree BodyNode) where
93 indexify (Tree n ts) =
98 <*> traverse indexify ts
99 BodySection section@Section{..} ->
100 Compose $ R.local (\ro -> ro{reader_section = Right section}) $
104 <*> traverse indexify ts
105 instance Indexify Section where
106 indexify Section{..} =
107 Section section_posXML section_attrs
108 <$> indexify section_title
109 <*> pure section_aliases
110 <*> traverse indexify section_judgments
111 instance Indexify Block where
112 indexify b = case b of
113 BlockPara p -> BlockPara <$> indexify p
114 BlockBreak{} -> pure b
117 BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks
118 BlockIndex{..} -> pure b
120 BlockFigure posXML type_ attrs
121 <$> traverse indexify mayTitle
122 <*> traverse indexify paras
123 BlockReferences{..} ->
124 BlockReferences posXML attrs
125 <$> traverse indexify refs
126 BlockJudges js -> BlockJudges <$> indexify js
128 BlockGrades posXML attrs
130 instance Indexify Para where
132 ParaItem{..} -> ParaItem <$> indexify item
133 ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items
134 instance Indexify ParaItem where
136 ParaPlain plain -> ParaPlain <$> indexify plain
137 ParaOL items -> ParaOL <$> traverse indexify items
138 ParaUL items -> ParaUL <$> traverse (traverse indexify) items
139 ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras
140 p@ParaArtwork{} -> pure p
141 p@ParaComment{} -> pure p
142 ParaJudgment j -> ParaJudgment <$> indexify j
143 instance Indexify ListItem where
144 indexify ListItem{..} = ListItem name <$> traverse indexify paras
145 instance Indexify Plain where
146 indexify = traverse indexify
147 instance Indexify (Tree PlainNode) where
148 indexify (Tree n ts) =
151 case pathFromWords iref_term of
152 Nothing -> Tree n <$> traverse indexify ts
154 Compose $ R.ask >>= \ro -> getCompose $
156 <$ Compose (pure (S.modify' $ \state_irefsByWords ->
157 TM.insert (<>) words (pure $ reader_section ro) $
160 <*> traverse indexify ts
162 -- TODO: introduce PlainGroup only when necessary?
163 Tree PlainGroup <$> indexifyWords (wordify txt)
164 _ -> Tree n <$> traverse indexify ts
165 instance Indexify Title where
166 indexify (Title p) = Title <$> indexify p
167 instance Indexify About where
170 <$> traverse indexify about_titles
172 <*> traverse indexify about_authors
173 <*> traverse indexify about_editor
174 <*> traverse indexify about_date
177 <*> pure about_series
178 <*> traverse indexify about_description
179 <*> pure about_headers
180 instance Indexify Entity where
181 indexify = pure -- TODO: to be coded
182 instance Indexify Date where
183 indexify = pure -- TODO: to be coded
184 instance Indexify Include where
185 indexify = pure -- TODO: to be coded
186 instance Indexify Reference where
187 indexify Reference{..} = do
192 <$> indexify reference_about
194 st@State{state_collect=All{..}, ..} <- S.get
195 let targets = HM.lookupDefault Seq.empty reference_id all_reference
196 case toList targets of
201 HM.insertWith (flip (<>)) reference_id (pure reference_locTCT) $
202 errors_reference_ambiguous state_errors
204 { state_errors = state_errors
205 { errors_reference_ambiguous = err }
207 about <- indexify reference_about
209 { reference_error = Just $ ErrorAnchor_Ambiguous $ Nat1 $ length $ err HM.!reference_id
210 , reference_about = about
213 instance Indexify Judges where
214 indexify Judges{..} =
219 <$> traverse (traverse indexify) judges_byName
220 instance Indexify [Grade] where
221 indexify = traverse indexify
222 instance Indexify Judgment where
223 indexify Judgment{..} =
225 judgment_opinionsByChoice
233 <$> traverse indexify judgment_question
234 <*> traverse indexify judgment_choices
235 instance Indexify Choice where
236 indexify Choice{..} =
237 Choice choice_locTCT choice_posXML
238 <$> traverse indexify choice_title
239 <*> traverse indexify choice_opinions
240 instance Indexify Opinion where
241 indexify Opinion{..} =
248 <$> traverse indexify opinion_comment
249 instance Indexify Grade where
256 <$> traverse indexify grade_title
257 instance Indexify Judge where
263 <$> traverse indexify judge_title
264 <*> pure judge_defaultGrades
267 wordify :: TL.Text -> Words
268 wordify = List.reverse . go []
270 go :: Words -> TL.Text -> Words
272 case TL.span Char.isAlphaNum t of
274 case TL.span Char.isSpace t of
278 Just (c,r) -> go (Word (TL.singleton c) : acc) r
279 (_s,r) -> go (Space : acc) r
280 (w,r) -> go (Word w : acc) r
282 plainifyWord :: WordOrSpace -> TL.Text
287 plainifyWords :: Words -> TL.Text
288 plainifyWords = TL.concat . (plainifyWord <$>)
290 indexifyWords :: Words -> Compose (R.Reader Reader) (S.State Index) Plain
292 Compose $ R.ask >>= \case
293 ro -> pure $ go mempty ws
295 go :: Plain -> Words -> S.State Index Plain
300 go (acc |> tree0 (PlainText " ")) next
302 goWords [] inp >>= \case
303 Nothing -> go (acc |> tree0 (PlainText w)) next
305 let iref_term = List.reverse ls
306 let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term
307 go (acc |> Tree PlainIref{iref_term} lines) ns
310 S.State Index (Maybe (Words, Words))
311 goWords prev inp = do
312 TM.TreeMap irefsByWord <- S.get
315 curr@Space : next -> goWords (curr:prev) next
316 curr@(Word w) : next ->
317 case Map.lookup w irefsByWord of
318 Nothing -> return Nothing
319 Just nod@TM.Node{..} -> do
320 let prev' = curr:prev
321 S.put node_descendants
324 | null node_descendants -> return Nothing
325 | otherwise -> goWords prev' next
326 <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord)
328 goWords prev' next >>= \case
332 nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs}
334 return $ Just (prev', next)
337 TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord
340 termsByChar :: Terms -> Map Char Terms
342 foldr (\aliases acc ->
344 (Word w:_):_ | not (TL.null w) ->
346 (Char.toUpper $ TL.index w 0)
351 -- * Type 'TermsByPos'
352 type TermsByPos = HM.HashMap XML.Pos Terms
354 -- ** Class 'CollectIndex'
355 class CollectIndex a where
356 collectIndex :: a -> TermsByPos
357 instance CollectIndex Document where
358 collectIndex Document{..} = collectIndex body
359 instance CollectIndex (TS.Trees BodyNode) where
360 collectIndex = foldMap $ \(TS.Tree b bs) ->
362 BodyBlock blk -> collectIndex blk
363 BodySection _ -> collectIndex bs
364 instance CollectIndex Block where
366 BlockIndex{..} -> HM.singleton posXML terms