1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 -- | Compute an Index for a DTC.
7 module Language.DTC.Index where
9 import Control.Applicative (Applicative(..))
10 import Control.Category
11 import Control.Monad (Monad(..), mapM, join)
13 import Data.Char (Char)
14 import Data.Default.Class (Default(..))
15 import Data.Foldable (Foldable(..), concat)
16 import Data.Function (($), const)
17 import Data.Functor ((<$>))
18 import Data.Map.Strict (Map)
19 import Data.Maybe (Maybe(..), maybe)
20 import Data.Monoid (Monoid(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.Text (Text)
24 import Data.Traversable (Traversable(..))
25 import Data.TreeMap.Strict (TreeMap(..))
26 import Data.TreeSeq.Strict (Tree(..), Trees)
27 import Prelude (Num(..))
28 import Text.Show (Show(..))
29 import qualified Control.Monad.Trans.State as S
30 import qualified Data.Char as Char
31 import qualified Data.List as List
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Sequence as Seq
34 import qualified Data.Strict.Maybe as Strict
35 import qualified Data.Text as Text
36 import qualified Data.TreeMap.Strict as TreeMap
38 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words)
39 import Language.XML (XmlPos(..))
40 import qualified Language.DTC.Document as DTC
42 termsByChar :: Terms -> Map Char Terms
44 foldr (\aliases acc ->
46 (Word w:_):_ | not (Text.null w) ->
48 (Char.toUpper $ Text.index w 0)
62 type Path = TreeMap.Path Word
65 type Refs = TreeMap Word [Ref]
67 refsOfTerms :: Terms -> Refs
68 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
71 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
73 pathFromWords :: Words -> Maybe Path
75 case ws >>= unSpace of
76 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
88 , state_section :: XmlPos
98 class Indexify a where
99 indexify :: a -> S.State State a
100 instance Indexify (Tree k a) => Indexify [Tree k a] where
101 indexify = mapM indexify
102 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
104 Tree0 v -> Tree0 <$> indexify v
107 DTC.Section{..} -> do
108 before@State{state_section} <- S.get
109 S.put before{state_section = pos}
110 t <- TreeN <$> indexify k <*> indexify v
112 S.put after{state_section}
114 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
115 indexify = mapM indexify
116 instance Indexify DTC.BodyKey where
119 DTC.Section pos attrs
122 instance Indexify DTC.BodyValue where
124 d@DTC.ToC{} -> pure d
125 d@DTC.ToF{} -> pure d
126 d@DTC.Index{} -> pure d
128 DTC.Figure pos attrs type_
132 DTC.Block <$> indexify v
133 instance Indexify [DTC.Reference] where
134 indexify = mapM indexify
135 instance Indexify [DTC.Block] where
136 indexify = mapM indexify
137 instance Indexify [[DTC.Block]] where
138 indexify = mapM (mapM indexify)
139 instance Indexify DTC.Title where
140 indexify (DTC.Title t) = DTC.Title <$> indexify t
141 instance Indexify DTC.Block where
143 DTC.Para{..} -> DTC.Para pos attrs <$> indexify para
144 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
145 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
146 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
147 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
148 d@DTC.Comment{} -> pure d
149 instance Indexify DTC.Para where
150 indexify ls = join <$> traverse indexifyLines ls
151 instance Indexify DTC.Reference where
153 instance Indexify DTC.Artwork where
156 indexifyLines :: DTC.Lines -> S.State State DTC.Para
157 indexifyLines = \case
158 Tree0 a -> indexifyPlain a
159 TreeN k@DTC.Iref{term} ts
160 | Just words <- pathFromWords term -> do
161 State{state_refs, state_section} <- S.get
162 case TreeMap.lookup words state_refs of
164 Seq.singleton . TreeN k . join
165 <$> traverse indexifyLines ts
166 Strict.Just refs -> do
167 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1
168 let ref = Ref{term, count, section=state_section}
169 S.modify $ \s -> s{state_refs=
170 TreeMap.insert const words (ref:refs) state_refs}
171 Seq.singleton . TreeN DTC.Iref{DTC.term, DTC.count} . join
172 <$> traverse indexifyLines ts
174 Seq.singleton . TreeN k . join
175 <$> traverse indexifyLines ts
177 indexifyPlain :: DTC.LineValue -> S.State State DTC.Para
178 indexifyPlain = \case
179 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
182 let (refs,ts) = indexifyWords state_section state_refs (wordify p)
183 S.modify $ \s -> s{state_refs=refs}
186 indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Para)
187 indexifyWords section = go mempty
189 go :: DTC.Para -> Refs -> Words -> (Refs, DTC.Para)
194 go (acc |> Tree0 (DTC.Plain " ")) refs next
196 case goWords [] refs [] inp of
197 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
198 Just (Ref{term,count}, ls, ns, rs) ->
199 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
200 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
204 Maybe (Ref, Words, Words, Refs)
205 goWords ws m@(TreeMap refsByWord) prev inp =
208 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
209 curr@(Word w) : next ->
210 let words = curr:ws in
211 case Map.lookup w refsByWord of
213 Just nod@TreeMap.Node{..} ->
216 | null node_descendants -> Nothing
218 (<$> goWords words node_descendants (curr:prev) next) $ \(ref, ls, ns, rs) ->
219 (ref, ls, ns, TreeMap $
220 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
222 case goWords words node_descendants (curr:prev) next of
224 let term = List.reverse words in
225 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
226 let ref = Ref{term, count, section} in
227 Just (ref, curr:prev, next, TreeMap $
228 Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
229 Just (ref, ls, ns, rs) ->
230 Just (ref, ls, ns, TreeMap $
231 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
233 wordify :: Text -> Words
234 wordify = List.reverse . go []
236 go :: Words -> Text -> Words
238 case Text.span Char.isAlphaNum t of
240 case Text.span Char.isSpace t of
242 case Text.uncons t of
244 Just (c,r) -> go (Word (Text.singleton c) : acc) r
245 (_s,r) -> go (Space : acc) r
246 (w,r) -> go (Word w : acc) r
248 plainifyWord :: WordOrSpace -> Text
253 plainifyWords :: Words -> Text
254 plainifyWords = Text.concat . (plainifyWord <$>)