]> Git — Sourcephile - doclang.git/blob - Hdoc/DTC/Analyze/Index.hs
Fix Index.
[doclang.git] / Hdoc / DTC / Analyze / Index.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 module Hdoc.DTC.Analyze.Index where
4
5 -- import Data.Eq (Eq(..))
6 -- import Text.Show (Show(..))
7 -- import qualified Control.Monad.Trans.Writer as W
8 -- import qualified Data.HashMap.Strict as HM
9 -- import qualified Data.Tree as Tree
10 -- import qualified Data.TreeSeq.Strict as TS
11 -- import qualified Hjugement as MJ
12 import Control.Applicative (Applicative(..))
13 import Control.Monad (Monad(..))
14 import Data.Bool
15 import Data.Char (Char)
16 import Data.Default.Class (Default(..))
17 import Data.Either (Either(..))
18 import Data.Foldable (Foldable(..), concat)
19 import Data.Function (($), (.), const)
20 import Data.Functor ((<$>), (<$))
21 import Data.Functor.Compose (Compose(..))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (Seq, (|>))
27 import Data.Traversable (Traversable(..))
28 import Data.TreeMap.Strict (TreeMap(..))
29 import Data.TreeSeq.Strict (Tree(..), tree0)
30 import qualified Data.HashMap.Strict as HM
31 -- import qualified Control.Monad.Trans.RWS.Strict as RWS
32 import qualified Control.Monad.Trans.Reader as R
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Sequence as Seq
38 import qualified Data.Strict.Maybe as Strict
39 import qualified Data.Text.Lazy as TL
40 import qualified Data.TreeMap.Strict as TM
41 import qualified Data.TreeSeq.Strict as TS
42
43 import Hdoc.DTC.Document as DTC
44 -- import Hdoc.TCT.Cell as TCT
45 import qualified Hdoc.XML as XML
46
47 -- * Type 'Index'
48 type Index = TM.TreeMap Word (Seq (Either Head Section))
49
50 -- | Build an 'Index' a whole 'Document', returning a mangled version of it
51 -- where 'PlainIref's are inserted as required by the given 'Terms'.
52 indexifyDocument :: Terms -> Document -> (Document, Index)
53 indexifyDocument terms doc =
54 let index = indexOfTerms terms in
55 if null terms then (doc, def)
56 else
57 (`S.runState` index) $
58 (`R.runReader` def) $
59 getCompose $
60 indexify doc
61
62 indexOfTerms :: Terms -> Index
63 indexOfTerms = TM.fromList const . (>>= f) . concat
64 where
65 f [] = []
66 f ws = maybe [] (\p -> [(p, Seq.empty)]) $ pathFromWords ws
67
68 -- * Type 'Reader'
69 newtype Reader = Reader
70 { reader_section :: Either Head Section
71 }
72 instance Default Reader where
73 def = Reader
74 { reader_section = Left def
75 }
76
77 -- * Type 'State'
78 type State = Index
79
80 -- * Class 'Indexify'
81 class Indexify a where
82 indexify :: a -> Compose (R.Reader Reader) (S.State State) a
83 instance Indexify Document where
84 indexify Document{..} =
85 Compose $ R.local (\ro -> ro{reader_section = Left head}) $
86 getCompose $
87 Document
88 <$> indexify head
89 <*> traverse indexify body
90 instance Indexify Head where
91 indexify h@Head{..} = pure h
92 instance Indexify (Tree BodyNode) where
93 indexify (Tree n ts) =
94 case n of
95 BodyBlock b ->
96 Tree . BodyBlock
97 <$> indexify b
98 <*> traverse indexify ts
99 BodySection section@Section{..} ->
100 Compose $ R.local (\ro -> ro{reader_section = Right section}) $
101 getCompose $
102 Tree . BodySection
103 <$> indexify section
104 <*> traverse indexify ts
105 instance Indexify Section where
106 indexify Section{..} =
107 Section section_posXML section_attrs
108 <$> indexify section_title
109 <*> pure section_aliases
110 <*> traverse indexify section_judgments
111 instance Indexify Block where
112 indexify b = case b of
113 BlockPara p -> BlockPara <$> indexify p
114 BlockBreak{} -> pure b
115 BlockToC{} -> pure b
116 BlockToF{} -> pure b
117 BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks
118 BlockIndex{..} -> pure b
119 BlockFigure{..} ->
120 BlockFigure posXML type_ attrs
121 <$> traverse indexify mayTitle
122 <*> traverse indexify paras
123 BlockReferences{..} ->
124 BlockReferences posXML attrs
125 <$> traverse indexify refs
126 BlockJudges js -> BlockJudges <$> indexify js
127 BlockGrades{..} ->
128 BlockGrades posXML attrs
129 <$> indexify scale
130 instance Indexify Para where
131 indexify = \case
132 ParaItem{..} -> ParaItem <$> indexify item
133 ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items
134 instance Indexify ParaItem where
135 indexify = \case
136 ParaPlain plain -> ParaPlain <$> indexify plain
137 ParaOL items -> ParaOL <$> traverse indexify items
138 ParaUL items -> ParaUL <$> traverse (traverse indexify) items
139 ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras
140 p@ParaArtwork{} -> pure p
141 p@ParaComment{} -> pure p
142 ParaJudgment j -> ParaJudgment <$> indexify j
143 instance Indexify ListItem where
144 indexify ListItem{..} = ListItem name <$> traverse indexify paras
145 instance Indexify Plain where
146 indexify = traverse indexify
147 instance Indexify (Tree PlainNode) where
148 indexify (Tree n ts) =
149 case n of
150 PlainIref{..} ->
151 case pathFromWords iref_term of
152 Nothing -> Tree n <$> traverse indexify ts
153 Just words ->
154 Compose $ R.ask >>= \ro -> getCompose $
155 Tree n
156 <$ Compose (pure (S.modify' $ \state_irefsByWords ->
157 TM.insert (<>) words (pure $ reader_section ro) $
158 state_irefsByWords
159 ))
160 <*> traverse indexify ts
161 PlainText txt ->
162 -- TODO: introduce PlainGroup only when necessary?
163 Tree PlainGroup <$> indexifyWords (wordify txt)
164 _ -> Tree n <$> traverse indexify ts
165 instance Indexify Title where
166 indexify (Title p) = Title <$> indexify p
167 instance Indexify About where
168 indexify About{..} =
169 About
170 <$> traverse indexify about_titles
171 <*> pure about_url
172 <*> traverse indexify about_authors
173 <*> traverse indexify about_editor
174 <*> traverse indexify about_date
175 <*> pure about_tags
176 <*> pure about_links
177 <*> pure about_series
178 <*> traverse indexify about_description
179 <*> pure about_headers
180 instance Indexify Entity where
181 indexify = pure -- TODO: to be coded
182 instance Indexify Date where
183 indexify = pure -- TODO: to be coded
184 instance Indexify Include where
185 indexify = pure -- TODO: to be coded
186 instance Indexify Reference where
187 indexify Reference{..} = do
188 Reference
189 reference_posXML
190 reference_locTCT
191 reference_id
192 <$> indexify reference_about
193 {-
194 st@State{state_collect=All{..}, ..} <- S.get
195 let targets = HM.lookupDefault Seq.empty reference_id all_reference
196 case toList targets of
197 [] -> undefined
198 [_] -> do
199 _ -> do
200 let err =
201 HM.insertWith (flip (<>)) reference_id (pure reference_locTCT) $
202 errors_reference_ambiguous state_errors
203 S.put st
204 { state_errors = state_errors
205 { errors_reference_ambiguous = err }
206 }
207 about <- indexify reference_about
208 return $ Reference
209 { reference_error = Just $ ErrorAnchor_Ambiguous $ Nat1 $ length $ err HM.!reference_id
210 , reference_about = about
211 , .. }
212 -}
213 instance Indexify Judges where
214 indexify Judges{..} =
215 Judges
216 judges_locTCT
217 judges_posXML
218 judges_attrs
219 <$> traverse (traverse indexify) judges_byName
220 instance Indexify [Grade] where
221 indexify = traverse indexify
222 instance Indexify Judgment where
223 indexify Judgment{..} =
224 Judgment
225 judgment_opinionsByChoice
226 judgment_judges
227 judgment_grades
228 judgment_posXML
229 judgment_locTCT
230 judgment_judgesId
231 judgment_gradesId
232 judgment_importance
233 <$> traverse indexify judgment_question
234 <*> traverse indexify judgment_choices
235 instance Indexify Choice where
236 indexify Choice{..} =
237 Choice choice_locTCT choice_posXML
238 <$> traverse indexify choice_title
239 <*> traverse indexify choice_opinions
240 instance Indexify Opinion where
241 indexify Opinion{..} =
242 Opinion
243 opinion_locTCT
244 opinion_posXML
245 opinion_judge
246 opinion_grade
247 opinion_importance
248 <$> traverse indexify opinion_comment
249 instance Indexify Grade where
250 indexify Grade{..} =
251 Grade
252 grade_posXML
253 grade_name
254 grade_color
255 grade_isDefault
256 <$> traverse indexify grade_title
257 instance Indexify Judge where
258 indexify Judge{..} =
259 Judge
260 judge_locTCT
261 judge_posXML
262 judge_name
263 <$> traverse indexify judge_title
264 <*> pure judge_defaultGrades
265
266 -- * Type 'Words'
267 wordify :: TL.Text -> Words
268 wordify = List.reverse . go []
269 where
270 go :: Words -> TL.Text -> Words
271 go acc t =
272 case TL.span Char.isAlphaNum t of
273 ("",_) ->
274 case TL.span Char.isSpace t of
275 ("",_) ->
276 case TL.uncons t of
277 Nothing -> acc
278 Just (c,r) -> go (Word (TL.singleton c) : acc) r
279 (_s,r) -> go (Space : acc) r
280 (w,r) -> go (Word w : acc) r
281
282 plainifyWord :: WordOrSpace -> TL.Text
283 plainifyWord = \case
284 Word w -> w
285 Space -> " "
286
287 plainifyWords :: Words -> TL.Text
288 plainifyWords = TL.concat . (plainifyWord <$>)
289
290 indexifyWords :: Words -> Compose (R.Reader Reader) (S.State Index) Plain
291 indexifyWords ws =
292 Compose $ R.ask >>= \case
293 ro -> pure $ go mempty ws
294 where
295 go :: Plain -> Words -> S.State Index Plain
296 go acc inp =
297 case inp of
298 [] -> return acc
299 Space : next ->
300 go (acc |> tree0 (PlainText " ")) next
301 Word w : next ->
302 goWords [] inp >>= \case
303 Nothing -> go (acc |> tree0 (PlainText w)) next
304 Just (ls, ns) -> do
305 let iref_term = List.reverse ls
306 let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term
307 go (acc |> Tree PlainIref{iref_term} lines) ns
308 goWords ::
309 Words -> Words ->
310 S.State Index (Maybe (Words, Words))
311 goWords prev inp = do
312 TM.TreeMap irefsByWord <- S.get
313 case inp of
314 [] -> return Nothing
315 curr@Space : next -> goWords (curr:prev) next
316 curr@(Word w) : next ->
317 case Map.lookup w irefsByWord of
318 Nothing -> return Nothing
319 Just nod@TM.Node{..} -> do
320 let prev' = curr:prev
321 S.put node_descendants
322 case node_value of
323 Strict.Nothing
324 | null node_descendants -> return Nothing
325 | otherwise -> goWords prev' next
326 <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord)
327 Strict.Just irefs ->
328 goWords prev' next >>= \case
329 Nothing -> do
330 S.put $ TM.TreeMap $
331 Map.insert w
332 nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs}
333 irefsByWord
334 return $ Just (prev', next)
335 r@Just{} -> do
336 S.modify' $ \rs ->
337 TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord
338 return r
339
340 termsByChar :: Terms -> Map Char Terms
341 termsByChar =
342 foldr (\aliases acc ->
343 case aliases of
344 (Word w:_):_ | not (TL.null w) ->
345 Map.insertWith (<>)
346 (Char.toUpper $ TL.index w 0)
347 [aliases] acc
348 _ -> acc
349 ) Map.empty
350
351 -- * Type 'TermsByPos'
352 type TermsByPos = HM.HashMap XML.Pos Terms
353
354 -- ** Class 'CollectIndex'
355 class CollectIndex a where
356 collectIndex :: a -> TermsByPos
357 instance CollectIndex Document where
358 collectIndex Document{..} = collectIndex body
359 instance CollectIndex (TS.Trees BodyNode) where
360 collectIndex = foldMap $ \(TS.Tree b bs) ->
361 case b of
362 BodyBlock blk -> collectIndex blk
363 BodySection _ -> collectIndex bs
364 instance CollectIndex Block where
365 collectIndex = \case
366 BlockIndex{..} -> HM.singleton posXML terms
367 _ -> def