]> Git — Sourcephile - doclang.git/blob - Language/DTC/Index.hs
Add indexify of Iref.
[doclang.git] / Language / DTC / Index.hs
1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 -- | Compute an Index for a DTC.
7 module Language.DTC.Index where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Category
11 import Control.Monad (Monad(..), mapM, join)
12 import Data.Bool
13 import Data.Char (Char)
14 import Data.Default.Class (Default(..))
15 import Data.Foldable (Foldable(..), concat)
16 import Data.Function (($), const)
17 import Data.Functor ((<$>))
18 import Data.Map.Strict (Map)
19 import Data.Maybe (Maybe(..), maybe)
20 import Data.Monoid (Monoid(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.Text (Text)
24 import Data.Traversable (Traversable(..))
25 import Data.TreeMap.Strict (TreeMap(..))
26 import Data.TreeSeq.Strict (Tree(..), Trees)
27 import Prelude (Num(..))
28 import Text.Show (Show(..))
29 import qualified Control.Monad.Trans.State as S
30 import qualified Data.Char as Char
31 import qualified Data.List as List
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Sequence as Seq
34 import qualified Data.Strict.Maybe as Strict
35 import qualified Data.Text as Text
36 import qualified Data.TreeMap.Strict as TreeMap
37
38 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words)
39 import Language.XML (XmlPos(..))
40 import qualified Language.DTC.Document as DTC
41
42 termsByChar :: Terms -> Map Char Terms
43 termsByChar =
44 foldr (\aliases acc ->
45 case aliases of
46 (Word w:_):_ | not (Text.null w) ->
47 Map.insertWith (<>)
48 (Char.toUpper $ Text.index w 0)
49 [aliases] acc
50 _ -> acc
51 ) Map.empty
52
53 -- * Type 'Ref'
54 data Ref
55 = Ref
56 { term :: Words
57 , count :: Count
58 , section :: XmlPos
59 } deriving (Show)
60
61 -- ** Type 'Path'
62 type Path = TreeMap.Path Word
63
64 -- ** Type 'Refs'
65 type Refs = TreeMap Word [Ref]
66
67 refsOfTerms :: Terms -> Refs
68 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
69 where
70 f [] = []
71 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
72
73 pathFromWords :: Words -> Maybe Path
74 pathFromWords ws =
75 case ws >>= unSpace of
76 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
77 _ -> Nothing
78 where
79 unSpace = \case
80 Space -> []
81 Word w -> [w]
82
83 -- * Type 'State'
84 data State
85 = State
86 { state_refs :: Refs
87 , state_text :: Bool
88 , state_section :: XmlPos
89 }
90 state :: State
91 state = State
92 { state_refs = mempty
93 , state_text = True
94 , state_section = def
95 }
96
97 -- * Class 'Indexify'
98 class Indexify a where
99 indexify :: a -> S.State State a
100 instance Indexify (Tree k a) => Indexify [Tree k a] where
101 indexify = mapM indexify
102 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
103 indexify = \case
104 Tree0 v -> Tree0 <$> indexify v
105 TreeN k v ->
106 case k of
107 DTC.Section{..} -> do
108 before@State{state_section} <- S.get
109 S.put before{state_section = pos}
110 t <- TreeN <$> indexify k <*> indexify v
111 after <- S.get
112 S.put after{state_section}
113 return t
114 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
115 indexify = mapM indexify
116 instance Indexify DTC.BodyKey where
117 indexify = \case
118 DTC.Section{..} ->
119 DTC.Section pos attrs
120 <$> indexify title
121 <*> pure aliases
122 instance Indexify DTC.BodyValue where
123 indexify = \case
124 d@DTC.ToC{} -> pure d
125 d@DTC.ToF{} -> pure d
126 d@DTC.Index{} -> pure d
127 DTC.Figure{..} ->
128 DTC.Figure pos attrs type_
129 <$> indexify title
130 <*> indexify blocks
131 DTC.Block v ->
132 DTC.Block <$> indexify v
133 instance Indexify [DTC.Reference] where
134 indexify = mapM indexify
135 instance Indexify [DTC.Block] where
136 indexify = mapM indexify
137 instance Indexify [[DTC.Block]] where
138 indexify = mapM (mapM indexify)
139 instance Indexify DTC.Title where
140 indexify (DTC.Title t) = DTC.Title <$> indexify t
141 instance Indexify DTC.Block where
142 indexify = \case
143 DTC.Para{..} -> DTC.Para pos attrs <$> indexify para
144 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
145 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
146 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
147 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
148 d@DTC.Comment{} -> pure d
149 instance Indexify DTC.Para where
150 indexify ls = join <$> traverse indexifyLines ls
151 instance Indexify DTC.Reference where
152 indexify = return
153 instance Indexify DTC.Artwork where
154 indexify = return
155
156 indexifyLines :: DTC.Lines -> S.State State DTC.Para
157 indexifyLines = \case
158 Tree0 a -> indexifyPlain a
159 TreeN k@DTC.Iref{term} ts
160 | Just words <- pathFromWords term -> do
161 State{state_refs, state_section} <- S.get
162 case TreeMap.lookup words state_refs of
163 Strict.Nothing ->
164 Seq.singleton . TreeN k . join
165 <$> traverse indexifyLines ts
166 Strict.Just refs -> do
167 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1
168 let ref = Ref{term, count, section=state_section}
169 S.modify $ \s -> s{state_refs=
170 TreeMap.insert const words (ref:refs) state_refs}
171 Seq.singleton . TreeN DTC.Iref{DTC.term, DTC.count} . join
172 <$> traverse indexifyLines ts
173 TreeN k ts ->
174 Seq.singleton . TreeN k . join
175 <$> traverse indexifyLines ts
176
177 indexifyPlain :: DTC.LineValue -> S.State State DTC.Para
178 indexifyPlain = \case
179 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
180 DTC.Plain p -> do
181 State{..} <- S.get
182 let (refs,ts) = indexifyWords state_section state_refs (wordify p)
183 S.modify $ \s -> s{state_refs=refs}
184 return ts
185
186 indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Para)
187 indexifyWords section = go mempty
188 where
189 go :: DTC.Para -> Refs -> Words -> (Refs, DTC.Para)
190 go acc refs inp =
191 case inp of
192 [] -> (refs, acc)
193 Space : next ->
194 go (acc |> Tree0 (DTC.Plain " ")) refs next
195 Word w : next ->
196 case goWords [] refs [] inp of
197 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
198 Just (Ref{term,count}, ls, ns, rs) ->
199 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
200 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
201 goWords ::
202 Words -> Refs ->
203 Words -> Words ->
204 Maybe (Ref, Words, Words, Refs)
205 goWords ws m@(TreeMap refsByWord) prev inp =
206 case inp of
207 [] -> Nothing
208 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
209 curr@(Word w) : next ->
210 let words = curr:ws in
211 case Map.lookup w refsByWord of
212 Nothing -> Nothing
213 Just nod@TreeMap.Node{..} ->
214 case node_value of
215 Strict.Nothing
216 | null node_descendants -> Nothing
217 | otherwise ->
218 (<$> goWords words node_descendants (curr:prev) next) $ \(ref, ls, ns, rs) ->
219 (ref, ls, ns, TreeMap $
220 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
221 Strict.Just refs ->
222 case goWords words node_descendants (curr:prev) next of
223 Nothing ->
224 let term = List.reverse words in
225 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
226 let ref = Ref{term, count, section} in
227 Just (ref, curr:prev, next, TreeMap $
228 Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
229 Just (ref, ls, ns, rs) ->
230 Just (ref, ls, ns, TreeMap $
231 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
232
233 wordify :: Text -> Words
234 wordify = List.reverse . go []
235 where
236 go :: Words -> Text -> Words
237 go acc t =
238 case Text.span Char.isAlphaNum t of
239 ("",_) ->
240 case Text.span Char.isSpace t of
241 ("",_) ->
242 case Text.uncons t of
243 Nothing -> acc
244 Just (c,r) -> go (Word (Text.singleton c) : acc) r
245 (_s,r) -> go (Space : acc) r
246 (w,r) -> go (Word w : acc) r
247
248 plainifyWord :: WordOrSpace -> Text
249 plainifyWord = \case
250 Word w -> w
251 Space -> " "
252
253 plainifyWords :: Words -> Text
254 plainifyWords = Text.concat . (plainifyWord <$>)