1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 module Hdoc.DTC.Analyze.Index where
5 -- import Data.Eq (Eq(..))
6 -- import Text.Show (Show(..))
7 -- import qualified Control.Monad.Trans.Writer as W
8 -- import qualified Data.HashMap.Strict as HM
9 -- import qualified Data.Tree as Tree
10 -- import qualified Data.TreeSeq.Strict as TS
11 -- import qualified Hjugement as MJ
12 import Control.Applicative (Applicative(..))
13 import Control.Monad (Monad(..))
15 import Data.Char (Char)
16 import Data.Default.Class (Default(..))
17 import Data.Either (Either(..))
18 import Data.Foldable (Foldable(..), concat)
19 import Data.Function (($), (.), const)
20 import Data.Functor ((<$>), (<$))
21 import Data.Functor.Compose (Compose(..))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (Seq, (|>))
27 import Data.Traversable (Traversable(..))
28 import Data.TreeMap.Strict (TreeMap(..))
29 import Data.TreeSeq.Strict (Tree(..), tree0)
30 import qualified Control.Monad.Trans.Reader as R
31 import qualified Control.Monad.Trans.State as S
32 import qualified Data.Char as Char
33 import qualified Data.List as List
34 import qualified Data.Map.Strict as Map
35 import qualified Data.Sequence as Seq
36 import qualified Data.Strict.Maybe as Strict
37 import qualified Data.Text.Lazy as TL
38 import qualified Data.TreeMap.Strict as TM
40 import Hdoc.DTC.Document as DTC
41 -- import Hdoc.TCT.Cell as TCT
42 -- import qualified Hdoc.XML as XML
44 -- * Type 'IrefsByWords'
45 type IrefsByWords = TM.TreeMap Word (Seq (Either Head Section))
47 irefsOfTerms :: Terms -> IrefsByWords
48 irefsOfTerms = TM.fromList const . (>>= f) . concat
51 f ws = maybe [] (\p -> [(p,Seq.empty)]) $ pathFromWords ws
53 wordify :: TL.Text -> Words
54 wordify = List.reverse . go []
56 go :: Words -> TL.Text -> Words
58 case TL.span Char.isAlphaNum t of
60 case TL.span Char.isSpace t of
64 Just (c,r) -> go (Word (TL.singleton c) : acc) r
65 (_s,r) -> go (Space : acc) r
66 (w,r) -> go (Word w : acc) r
68 plainifyWord :: WordOrSpace -> TL.Text
73 plainifyWords :: Words -> TL.Text
74 plainifyWords = TL.concat . (plainifyWord <$>)
76 termsByChar :: Terms -> Map Char Terms
78 foldr (\aliases acc ->
80 (Word w:_):_ | not (TL.null w) ->
82 (Char.toUpper $ TL.index w 0)
89 { reader_section :: Either Head Section
91 instance Default Reader where
93 { reader_section = Left def
97 type State = IrefsByWords
100 class Indexify a where
101 indexify :: a -> Compose (R.Reader Reader) (S.State State) a
102 instance Indexify Document where
103 indexify Document{..} =
104 Compose $ R.local (\ro -> ro{reader_section = Left head}) $
108 <*> traverse indexify body
109 instance Indexify Head where
110 indexify h@Head{..} = pure h
111 instance Indexify (Tree BodyNode) where
112 indexify (Tree n ts) =
117 <*> traverse indexify ts
118 BodySection section@Section{..} ->
119 Compose $ R.local (\ro -> ro{reader_section = Right section}) $
123 <*> traverse indexify ts
124 instance Indexify Section where
125 indexify Section{..} =
126 Section section_posXML section_attrs
127 <$> indexify section_title
128 <*> pure section_aliases
129 <*> traverse indexify section_judgments
130 instance Indexify Block where
131 indexify b = case b of
132 BlockPara p -> BlockPara <$> indexify p
133 BlockBreak{} -> pure b
136 BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks
137 BlockIndex{..} -> pure b
139 BlockFigure posXML type_ attrs
140 <$> traverse indexify mayTitle
141 <*> traverse indexify paras
142 BlockReferences{..} ->
143 BlockReferences posXML attrs
144 <$> traverse indexify refs
145 BlockJudges js -> BlockJudges <$> indexify js
147 BlockGrades posXML attrs
149 instance Indexify Para where
151 ParaItem{..} -> ParaItem <$> indexify item
152 ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items
153 instance Indexify ParaItem where
155 ParaPlain plain -> ParaPlain <$> indexify plain
156 ParaOL items -> ParaOL <$> traverse indexify items
157 ParaUL items -> ParaUL <$> traverse (traverse indexify) items
158 ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras
159 p@ParaArtwork{} -> pure p
160 p@ParaComment{} -> pure p
161 ParaJudgment j -> ParaJudgment <$> indexify j
162 instance Indexify ListItem where
163 indexify ListItem{..} = ListItem name <$> traverse indexify paras
164 instance Indexify Plain where
165 indexify = traverse indexify
166 instance Indexify (Tree PlainNode) where
167 indexify (Tree n ts) =
170 case pathFromWords iref_term of
171 Nothing -> Tree n <$> traverse indexify ts
173 Compose $ R.ask >>= \ro -> getCompose $
175 <$ Compose (pure (S.modify' $ \state_irefsByWords ->
176 TM.insert (<>) words (pure $ reader_section ro) $
179 <*> traverse indexify ts
181 -- TODO: introduce PlainGroup only when necessary?
182 Tree PlainGroup <$> indexifyWords (wordify txt)
183 _ -> Tree n <$> traverse indexify ts
184 instance Indexify Title where
185 indexify (Title p) = Title <$> indexify p
186 instance Indexify About where
189 <$> traverse indexify about_titles
191 <*> traverse indexify about_authors
192 <*> traverse indexify about_editor
193 <*> traverse indexify about_date
196 <*> pure about_series
197 <*> traverse indexify about_description
198 <*> pure about_headers
199 instance Indexify Entity where
200 indexify = pure -- TODO: to be coded
201 instance Indexify Date where
202 indexify = pure -- TODO: to be coded
203 instance Indexify Include where
204 indexify = pure -- TODO: to be coded
205 instance Indexify Reference where
206 indexify Reference{..} = do
211 <$> indexify reference_about
213 st@State{state_collect=All{..}, ..} <- S.get
214 let targets = HM.lookupDefault Seq.empty reference_id all_reference
215 case toList targets of
220 HM.insertWith (flip (<>)) reference_id (pure reference_locTCT) $
221 errors_reference_ambiguous state_errors
223 { state_errors = state_errors
224 { errors_reference_ambiguous = err }
226 about <- indexify reference_about
228 { reference_error = Just $ ErrorAnchor_Ambiguous $ Nat1 $ length $ err HM.!reference_id
229 , reference_about = about
232 instance Indexify Judges where
233 indexify Judges{..} =
238 <$> traverse (traverse indexify) judges_byName
239 instance Indexify [Grade] where
240 indexify = traverse indexify
241 instance Indexify Judgment where
242 indexify Judgment{..} =
244 judgment_opinionsByChoice
252 <$> traverse indexify judgment_question
253 <*> traverse indexify judgment_choices
254 instance Indexify Choice where
255 indexify Choice{..} =
256 Choice choice_locTCT choice_posXML
257 <$> traverse indexify choice_title
258 <*> traverse indexify choice_opinions
259 instance Indexify Opinion where
260 indexify Opinion{..} =
267 <$> traverse indexify opinion_comment
268 instance Indexify Grade where
275 <$> traverse indexify grade_title
276 instance Indexify Judge where
282 <$> traverse indexify judge_title
283 <*> pure judge_defaultGrades
285 indexifyWords :: Words -> Compose (R.Reader Reader) (S.State IrefsByWords) Plain
287 Compose $ R.ask >>= \case
288 ro -> pure $ go mempty ws
290 go :: Plain -> Words -> S.State IrefsByWords Plain
295 go (acc |> tree0 (PlainText " ")) next
297 goWords [] inp >>= \case
298 Nothing -> go (acc |> tree0 (PlainText w)) next
300 let iref_term = List.reverse ls
301 let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term
302 go (acc |> Tree PlainIref{iref_term} lines) ns
305 S.State IrefsByWords (Maybe (Words, Words))
306 goWords prev inp = do
307 TM.TreeMap irefsByWord <- S.get
310 curr@Space : next -> goWords (curr:prev) next
311 curr@(Word w) : next ->
312 case Map.lookup w irefsByWord of
313 Nothing -> return Nothing
314 Just nod@TM.Node{..} -> do
315 let prev' = curr:prev
316 S.put node_descendants
319 | null node_descendants -> return Nothing
320 | otherwise -> goWords prev' next
321 <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord)
323 goWords prev' next >>= \case
327 nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs}
329 return $ Just (prev', next)
332 TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord