1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 -- | Compute an Index for a DTC.
7 module Language.DTC.Index where
9 import Control.Applicative (Applicative(..))
10 import Control.Category
11 import Control.Monad (Monad(..), mapM, join)
13 import Data.Char (Char)
14 import Data.Default.Class (Default(..))
15 import Data.Foldable (Foldable(..), concat)
16 import Data.Function (($), const)
17 import Data.Functor ((<$>))
18 import Data.Map.Strict (Map)
19 import Data.Maybe (Maybe(..), maybe)
20 import Data.Monoid (Monoid(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.Text (Text)
24 import Data.Traversable (Traversable(..))
25 import Data.TreeMap.Strict (TreeMap(..))
26 import Data.TreeSeq.Strict (Tree(..), Trees)
27 import Prelude (Num(..))
28 import Text.Show (Show(..))
29 import qualified Control.Monad.Trans.State as S
30 import qualified Data.Char as Char
31 import qualified Data.List as List
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Sequence as Seq
34 import qualified Data.Strict.Maybe as Strict
35 import qualified Data.Text as Text
36 import qualified Data.TreeMap.Strict as TreeMap
38 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words, Pos)
39 import qualified Language.DTC.Document as DTC
41 termsByChar :: Terms -> Map Char Terms
43 foldr (\aliases acc ->
45 (Word w:_):_ | not (Text.null w) ->
47 (Char.toUpper $ Text.index w 0)
61 type Path = TreeMap.Path Word
64 type Refs = TreeMap Word [Ref]
66 refsOfTerms :: Terms -> Refs
67 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
70 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
72 pathFromWords :: Words -> Maybe Path
74 case ws >>= unSpace of
75 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
87 , state_section :: Pos
97 class Indexify a where
98 indexify :: a -> S.State State a
99 instance Indexify (Tree k a) => Indexify [Tree k a] where
100 indexify = mapM indexify
101 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
103 Tree0 v -> Tree0 <$> indexify v
106 DTC.Section{..} -> do
107 before@State{state_section} <- S.get
108 S.put before{state_section = pos}
109 t <- TreeN <$> indexify k <*> indexify v
111 S.put after{state_section}
113 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
114 indexify = mapM indexify
115 instance Indexify DTC.BodyKey where
118 DTC.Section pos attrs
121 instance Indexify DTC.BodyValue where
123 d@DTC.ToC{} -> pure d
124 d@DTC.ToF{} -> pure d
125 d@DTC.Index{} -> pure d
127 DTC.Figure pos attrs type_
131 DTC.Block <$> indexify v
132 instance Indexify [DTC.Reference] where
133 indexify = mapM indexify
134 instance Indexify [DTC.Block] where
135 indexify = mapM indexify
136 instance Indexify [[DTC.Block]] where
137 indexify = mapM (mapM indexify)
138 instance Indexify DTC.Title where
139 indexify (DTC.Title t) = DTC.Title <$> indexify t
140 instance Indexify DTC.Block where
142 DTC.Para{..} -> DTC.Para pos attrs <$> indexify para
143 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
144 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
145 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
146 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
147 d@DTC.Comment{} -> pure d
148 instance Indexify DTC.Para where
149 indexify ls = join <$> traverse indexifyLines ls
150 instance Indexify DTC.Reference where
152 instance Indexify DTC.Artwork where
155 indexifyLines :: DTC.Lines -> S.State State DTC.Para
156 indexifyLines = \case
157 Tree0 a -> indexifyPlain a
158 TreeN k@DTC.Iref{term} ts
159 | Just words <- pathFromWords term -> do
160 State{state_refs, state_section} <- S.get
161 case TreeMap.lookup words state_refs of
163 Seq.singleton . TreeN k . join
164 <$> traverse indexifyLines ts
165 Strict.Just refs -> do
166 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1
167 let ref = Ref{term, count, section=state_section}
168 S.modify $ \s -> s{state_refs=
169 TreeMap.insert const words (ref:refs) state_refs}
170 Seq.singleton . TreeN DTC.Iref{DTC.term, DTC.count} . join
171 <$> traverse indexifyLines ts
173 Seq.singleton . TreeN k . join
174 <$> traverse indexifyLines ts
176 indexifyPlain :: DTC.LineValue -> S.State State DTC.Para
177 indexifyPlain = \case
178 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
181 let (refs,ts) = indexifyWords state_section state_refs (wordify p)
182 S.modify $ \s -> s{state_refs=refs}
185 indexifyWords :: Pos -> Refs -> Words -> (Refs, DTC.Para)
186 indexifyWords section = go mempty
188 go :: DTC.Para -> Refs -> Words -> (Refs, DTC.Para)
193 go (acc |> Tree0 (DTC.Plain " ")) refs next
195 case goWords [] refs [] inp of
196 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
197 Just (Ref{term,count}, ls, ns, rs) ->
198 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
199 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
203 Maybe (Ref, Words, Words, Refs)
204 goWords ws m@(TreeMap refsByWord) prev inp =
207 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
208 curr@(Word w) : next ->
209 let words = curr:ws in
210 case Map.lookup w refsByWord of
212 Just nod@TreeMap.Node{..} ->
215 | null node_descendants -> Nothing
217 (<$> goWords words node_descendants (curr:prev) next) $ \(ref, ls, ns, rs) ->
218 (ref, ls, ns, TreeMap $
219 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
221 case goWords words node_descendants (curr:prev) next of
223 let term = List.reverse words in
224 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
225 let ref = Ref{term, count, section} in
226 Just (ref, curr:prev, next, TreeMap $
227 Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
228 Just (ref, ls, ns, rs) ->
229 Just (ref, ls, ns, TreeMap $
230 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
232 wordify :: Text -> Words
233 wordify = List.reverse . go []
235 go :: Words -> Text -> Words
237 case Text.span Char.isAlphaNum t of
239 case Text.span Char.isSpace t of
241 case Text.uncons t of
243 Just (c,r) -> go (Word (Text.singleton c) : acc) r
244 (_s,r) -> go (Space : acc) r
245 (w,r) -> go (Word w : acc) r
247 plainifyWord :: WordOrSpace -> Text
252 plainifyWords :: Words -> Text
253 plainifyWords = Text.concat . (plainifyWord <$>)