1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 module Hdoc.DTC.Analyze.Index where
5 -- import Data.Eq (Eq(..))
6 -- import Text.Show (Show(..))
7 -- import qualified Control.Monad.Trans.Writer as W
8 -- import qualified Data.HashMap.Strict as HM
9 -- import qualified Data.Tree as Tree
10 -- import qualified Data.TreeSeq.Strict as TS
11 -- import qualified Hjugement as MJ
12 import Control.Applicative (Applicative(..))
13 import Control.Monad (Monad(..))
15 import Data.Char (Char)
16 import Data.Default.Class (Default(..))
17 import Data.Either (Either(..))
18 import Data.Foldable (Foldable(..), concat)
19 import Data.Function (($), (.), const)
20 import Data.Functor ((<$>), (<$))
21 import Data.Functor.Compose (Compose(..))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence (Seq, (|>))
27 import Data.Traversable (Traversable(..))
28 import Data.TreeMap.Strict (TreeMap(..))
29 import Data.TreeSeq.Strict (Tree(..), tree0)
30 import qualified Control.Monad.Trans.Reader as R
31 import qualified Control.Monad.Trans.State as S
32 import qualified Data.Char as Char
33 import qualified Data.List as List
34 import qualified Data.Map.Strict as Map
35 import qualified Data.Sequence as Seq
36 import qualified Data.Strict.Maybe as Strict
37 import qualified Data.Text.Lazy as TL
38 import qualified Data.TreeMap.Strict as TM
40 import Hdoc.DTC.Document as DTC
41 -- import Hdoc.TCT.Cell as TCT
42 -- import qualified Hdoc.XML as XML
44 -- * Type 'IrefsByWords'
45 type IrefsByWords = TM.TreeMap Word (Seq (Either Head Section))
47 irefsOfTerms :: Terms -> IrefsByWords
48 irefsOfTerms = TM.fromList const . (>>= f) . concat
51 f ws = maybe [] (\p -> [(p,Seq.empty)]) $ pathFromWords ws
53 wordify :: TL.Text -> Words
54 wordify = List.reverse . go []
56 go :: Words -> TL.Text -> Words
58 case TL.span Char.isAlphaNum t of
60 case TL.span Char.isSpace t of
64 Just (c,r) -> go (Word (TL.singleton c) : acc) r
65 (_s,r) -> go (Space : acc) r
66 (w,r) -> go (Word w : acc) r
68 plainifyWord :: WordOrSpace -> TL.Text
73 plainifyWords :: Words -> TL.Text
74 plainifyWords = TL.concat . (plainifyWord <$>)
76 termsByChar :: Terms -> Map Char Terms
78 foldr (\aliases acc ->
80 (Word w:_):_ | not (TL.null w) ->
82 (Char.toUpper $ TL.index w 0)
89 { reader_section :: Either Head Section
91 instance Default Reader where
93 { reader_section = Left def
97 type State = IrefsByWords
100 class Indexify a where
101 indexify :: a -> Compose (R.Reader Reader) (S.State State) a
102 instance Indexify Document where
103 indexify Document{..} =
104 Compose $ R.local (\ro -> ro{reader_section = Left head}) $
108 <*> traverse indexify body
109 instance Indexify Head where
110 indexify h@Head{..} = pure h
111 instance Indexify (Tree BodyNode) where
112 indexify (Tree n ts) =
117 <*> traverse indexify ts
118 BodySection section@Section{..} ->
119 Compose $ R.local (\ro -> ro{reader_section = Right section}) $
123 <*> traverse indexify ts
124 instance Indexify Section where
125 indexify Section{..} =
126 Section section_posXML section_attrs
127 <$> indexify section_title
128 <*> pure section_aliases
129 <*> traverse indexify section_judgments
130 instance Indexify Block where
131 indexify b = case b of
132 BlockPara p -> BlockPara <$> indexify p
133 BlockBreak{} -> pure b
136 BlockAside{..} -> BlockAside posXML attrs <$> traverse indexify blocks
137 BlockIndex{..} -> pure b
139 BlockFigure posXML type_ attrs
140 <$> traverse indexify mayTitle
141 <*> traverse indexify paras
142 BlockReferences{..} ->
143 BlockReferences posXML attrs
144 <$> traverse indexify refs
145 BlockJudges js -> BlockJudges <$> indexify js
147 BlockGrades posXML attrs
149 instance Indexify Para where
151 ParaItem{..} -> ParaItem <$> indexify item
152 ParaItems{..} -> ParaItems posXML attrs <$> traverse indexify items
153 instance Indexify ParaItem where
155 ParaPlain plain -> ParaPlain <$> indexify plain
156 ParaOL items -> ParaOL <$> traverse indexify items
157 ParaUL items -> ParaUL <$> traverse (traverse indexify) items
158 ParaQuote{..} -> ParaQuote type_ <$> traverse indexify paras
159 p@ParaArtwork{} -> pure p
160 p@ParaComment{} -> pure p
161 ParaJudgment j -> ParaJudgment <$> indexify j
162 instance Indexify ListItem where
163 indexify ListItem{..} = ListItem name <$> traverse indexify paras
164 instance Indexify Plain where
165 indexify = traverse indexify
166 instance Indexify (Tree PlainNode) where
167 indexify (Tree n ts) =
170 case pathFromWords iref_term of
171 Nothing -> Tree n <$> traverse indexify ts
173 Compose $ R.ask >>= \ro -> getCompose $
175 <$ Compose (pure (S.modify' $ \state_irefsByWords ->
176 TM.insert (<>) words (pure $ reader_section ro) $
179 <*> traverse indexify ts
181 -- TODO: introduce PlainGroup only when necessary?
182 Tree PlainGroup <$> indexifyWords (wordify txt)
183 _ -> Tree n <$> traverse indexify ts
184 instance Indexify Title where
185 indexify (Title p) = Title <$> indexify p
186 instance Indexify About where
189 <$> traverse indexify about_titles
191 <*> traverse indexify about_authors
192 <*> traverse indexify about_editor
193 <*> traverse indexify about_date
196 <*> pure about_series
197 <*> traverse indexify about_includes
198 instance Indexify Entity where
199 indexify = pure -- TODO: to be coded
200 instance Indexify Date where
201 indexify = pure -- TODO: to be coded
202 instance Indexify Include where
203 indexify = pure -- TODO: to be coded
204 instance Indexify Reference where
205 indexify Reference{..} = do
210 <$> indexify reference_about
212 st@State{state_collect=All{..}, ..} <- S.get
213 let targets = HM.lookupDefault Seq.empty reference_id all_reference
214 case toList targets of
219 HM.insertWith (flip (<>)) reference_id (pure reference_locTCT) $
220 errors_reference_ambiguous state_errors
222 { state_errors = state_errors
223 { errors_reference_ambiguous = err }
225 about <- indexify reference_about
227 { reference_error = Just $ ErrorAnchor_Ambiguous $ Nat1 $ length $ err HM.!reference_id
228 , reference_about = about
231 instance Indexify Judges where
232 indexify Judges{..} =
237 <$> traverse (traverse indexify) judges_byName
238 instance Indexify [Grade] where
239 indexify = traverse indexify
240 instance Indexify Judgment where
241 indexify Judgment{..} =
243 judgment_opinionsByChoice
251 <$> traverse indexify judgment_question
252 <*> traverse indexify judgment_choices
253 instance Indexify Choice where
254 indexify Choice{..} =
255 Choice choice_locTCT choice_posXML
256 <$> traverse indexify choice_title
257 <*> traverse indexify choice_opinions
258 instance Indexify Opinion where
259 indexify Opinion{..} =
266 <$> traverse indexify opinion_comment
267 instance Indexify Grade where
274 <$> traverse indexify grade_title
275 instance Indexify Judge where
281 <$> traverse indexify judge_title
282 <*> pure judge_defaultGrades
284 indexifyWords :: Words -> Compose (R.Reader Reader) (S.State IrefsByWords) Plain
286 Compose $ R.ask >>= \case
287 ro -> pure $ go mempty ws
289 go :: Plain -> Words -> S.State IrefsByWords Plain
294 go (acc |> tree0 (PlainText " ")) next
296 goWords [] inp >>= \case
297 Nothing -> go (acc |> tree0 (PlainText w)) next
299 let iref_term = List.reverse ls
300 let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term
301 go (acc |> Tree PlainIref{iref_term} lines) ns
304 S.State IrefsByWords (Maybe (Words, Words))
305 goWords prev inp = do
306 TM.TreeMap irefsByWord <- S.get
309 curr@Space : next -> goWords (curr:prev) next
310 curr@(Word w) : next ->
311 case Map.lookup w irefsByWord of
312 Nothing -> return Nothing
313 Just nod@TM.Node{..} -> do
314 let prev' = curr:prev
315 S.put node_descendants
318 | null node_descendants -> return Nothing
319 | otherwise -> goWords prev' next
320 <* S.modify' (\rs -> TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord)
322 goWords prev' next >>= \case
326 nod{TM.node_value = Strict.Just $ reader_section ro Seq.<|irefs}
328 return $ Just (prev', next)
331 TM.TreeMap $ Map.insert w nod{TM.node_descendants = rs} irefsByWord