]> Git — Sourcephile - doclang.git/blob - Language/DTC/Index.hs
Fix ToF ordering.
[doclang.git] / Language / DTC / Index.hs
1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# OPTIONS_GHC -fno-warn-orphans #-}
6 -- | Compute an Index for a DTC.
7 module Language.DTC.Index where
8
9 import Control.Applicative (Applicative(..))
10 import Control.Category
11 import Control.Monad (Monad(..), mapM, join)
12 import Data.Bool
13 import Data.Char (Char)
14 import Data.Default.Class (Default(..))
15 import Data.Foldable (Foldable(..), concat)
16 import Data.Function (($), const)
17 import Data.Functor ((<$>))
18 import Data.Map.Strict (Map)
19 import Data.Maybe (Maybe(..), maybe)
20 import Data.Monoid (Monoid(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.Text (Text)
24 import Data.Traversable (Traversable(..))
25 import Data.TreeMap.Strict (TreeMap(..))
26 import Data.TreeSeq.Strict (Tree(..), Trees)
27 import Prelude (Num(..))
28 import Text.Show (Show(..))
29 import qualified Control.Monad.Trans.State as S
30 import qualified Data.Char as Char
31 import qualified Data.List as List
32 import qualified Data.Map.Strict as Map
33 import qualified Data.Sequence as Seq
34 import qualified Data.Strict.Maybe as Strict
35 import qualified Data.Text as Text
36 import qualified Data.TreeMap.Strict as TreeMap
37
38 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words, Pos)
39 import qualified Language.DTC.Document as DTC
40
41 termsByChar :: Terms -> Map Char Terms
42 termsByChar =
43 foldr (\aliases acc ->
44 case aliases of
45 (Word w:_):_ | not (Text.null w) ->
46 Map.insertWith (<>)
47 (Char.toUpper $ Text.index w 0)
48 [aliases] acc
49 _ -> acc
50 ) Map.empty
51
52 -- * Type 'Ref'
53 data Ref
54 = Ref
55 { term :: Words
56 , count :: Count
57 , section :: Pos
58 } deriving (Show)
59
60 -- ** Type 'Path'
61 type Path = TreeMap.Path Word
62
63 -- ** Type 'Refs'
64 type Refs = TreeMap Word [Ref]
65
66 refsOfTerms :: Terms -> Refs
67 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
68 where
69 f [] = []
70 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
71
72 pathFromWords :: Words -> Maybe Path
73 pathFromWords ws =
74 case ws >>= unSpace of
75 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
76 _ -> Nothing
77 where
78 unSpace = \case
79 Space -> []
80 Word w -> [w]
81
82 -- * Type 'State'
83 data State
84 = State
85 { state_refs :: Refs
86 , state_text :: Bool
87 , state_section :: Pos
88 }
89 state :: State
90 state = State
91 { state_refs = mempty
92 , state_text = True
93 , state_section = def
94 }
95
96 -- * Class 'Indexify'
97 class Indexify a where
98 indexify :: a -> S.State State a
99 instance Indexify (Tree k a) => Indexify [Tree k a] where
100 indexify = mapM indexify
101 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
102 indexify = \case
103 Tree0 v -> Tree0 <$> indexify v
104 TreeN k v ->
105 case k of
106 DTC.Section{..} -> do
107 before@State{state_section} <- S.get
108 S.put before{state_section = pos}
109 t <- TreeN <$> indexify k <*> indexify v
110 after <- S.get
111 S.put after{state_section}
112 return t
113 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
114 indexify = mapM indexify
115 instance Indexify DTC.BodyKey where
116 indexify = \case
117 DTC.Section{..} ->
118 DTC.Section pos attrs
119 <$> indexify title
120 <*> pure aliases
121 instance Indexify DTC.BodyValue where
122 indexify = \case
123 d@DTC.ToC{} -> pure d
124 d@DTC.ToF{} -> pure d
125 d@DTC.Index{} -> pure d
126 DTC.Figure{..} ->
127 DTC.Figure pos attrs type_
128 <$> indexify title
129 <*> indexify blocks
130 DTC.Block v ->
131 DTC.Block <$> indexify v
132 instance Indexify [DTC.Reference] where
133 indexify = mapM indexify
134 instance Indexify [DTC.Block] where
135 indexify = mapM indexify
136 instance Indexify [[DTC.Block]] where
137 indexify = mapM (mapM indexify)
138 instance Indexify DTC.Title where
139 indexify (DTC.Title t) = DTC.Title <$> indexify t
140 instance Indexify DTC.Block where
141 indexify = \case
142 DTC.Para{..} -> DTC.Para pos attrs <$> indexify para
143 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
144 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
145 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
146 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
147 d@DTC.Comment{} -> pure d
148 instance Indexify DTC.Para where
149 indexify ls = join <$> traverse indexifyLines ls
150 instance Indexify DTC.Reference where
151 indexify = return
152 instance Indexify DTC.Artwork where
153 indexify = return
154
155 indexifyLines :: DTC.Lines -> S.State State DTC.Para
156 indexifyLines = \case
157 Tree0 a -> indexifyPlain a
158 TreeN k@DTC.Iref{term} ts
159 | Just words <- pathFromWords term -> do
160 State{state_refs, state_section} <- S.get
161 case TreeMap.lookup words state_refs of
162 Strict.Nothing ->
163 Seq.singleton . TreeN k . join
164 <$> traverse indexifyLines ts
165 Strict.Just refs -> do
166 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1
167 let ref = Ref{term, count, section=state_section}
168 S.modify $ \s -> s{state_refs=
169 TreeMap.insert const words (ref:refs) state_refs}
170 Seq.singleton . TreeN DTC.Iref{DTC.term, DTC.count} . join
171 <$> traverse indexifyLines ts
172 TreeN k ts ->
173 Seq.singleton . TreeN k . join
174 <$> traverse indexifyLines ts
175
176 indexifyPlain :: DTC.LineValue -> S.State State DTC.Para
177 indexifyPlain = \case
178 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
179 DTC.Plain p -> do
180 State{..} <- S.get
181 let (refs,ts) = indexifyWords state_section state_refs (wordify p)
182 S.modify $ \s -> s{state_refs=refs}
183 return ts
184
185 indexifyWords :: Pos -> Refs -> Words -> (Refs, DTC.Para)
186 indexifyWords section = go mempty
187 where
188 go :: DTC.Para -> Refs -> Words -> (Refs, DTC.Para)
189 go acc refs inp =
190 case inp of
191 [] -> (refs, acc)
192 Space : next ->
193 go (acc |> Tree0 (DTC.Plain " ")) refs next
194 Word w : next ->
195 case goWords [] refs [] inp of
196 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
197 Just (Ref{term,count}, ls, ns, rs) ->
198 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
199 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
200 goWords ::
201 Words -> Refs ->
202 Words -> Words ->
203 Maybe (Ref, Words, Words, Refs)
204 goWords ws m@(TreeMap refsByWord) prev inp =
205 case inp of
206 [] -> Nothing
207 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
208 curr@(Word w) : next ->
209 let words = curr:ws in
210 case Map.lookup w refsByWord of
211 Nothing -> Nothing
212 Just nod@TreeMap.Node{..} ->
213 case node_value of
214 Strict.Nothing
215 | null node_descendants -> Nothing
216 | otherwise ->
217 (<$> goWords words node_descendants (curr:prev) next) $ \(ref, ls, ns, rs) ->
218 (ref, ls, ns, TreeMap $
219 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
220 Strict.Just refs ->
221 case goWords words node_descendants (curr:prev) next of
222 Nothing ->
223 let term = List.reverse words in
224 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
225 let ref = Ref{term, count, section} in
226 Just (ref, curr:prev, next, TreeMap $
227 Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
228 Just (ref, ls, ns, rs) ->
229 Just (ref, ls, ns, TreeMap $
230 Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
231
232 wordify :: Text -> Words
233 wordify = List.reverse . go []
234 where
235 go :: Words -> Text -> Words
236 go acc t =
237 case Text.span Char.isAlphaNum t of
238 ("",_) ->
239 case Text.span Char.isSpace t of
240 ("",_) ->
241 case Text.uncons t of
242 Nothing -> acc
243 Just (c,r) -> go (Word (Text.singleton c) : acc) r
244 (_s,r) -> go (Space : acc) r
245 (w,r) -> go (Word w : acc) r
246
247 plainifyWord :: WordOrSpace -> Text
248 plainifyWord = \case
249 Word w -> w
250 Space -> " "
251
252 plainifyWords :: Words -> Text
253 plainifyWords = Text.concat . (plainifyWord <$>)