]> Git — Sourcephile - doclang.git/blob - Language/DTC/Index.hs
Use TreeSeq to make DTC.Line.
[doclang.git] / Language / DTC / Index.hs
1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE MultiParamTypeClasses #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE ScopedTypeVariables #-}
7 {-# LANGUAGE TypeApplications #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 -- | Compute an Index for a DTC.
11 module Language.DTC.Index where
12
13 import Control.Applicative (Applicative(..))
14 import Control.Category
15 import Control.Monad (Monad(..), mapM)
16 import Data.Bool
17 import Data.Char (Char)
18 import Data.Default.Class (Default(..))
19 import Data.Foldable (Foldable(..), concat)
20 import Data.Function (($), const)
21 import Data.Functor ((<$>))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence ((|>))
27 import Data.Text (Text)
28 import Data.Traversable (Traversable(..))
29 import Data.TreeMap.Strict (TreeMap(..))
30 import Data.TreeSeq.Strict (Tree(..), Trees)
31 import Prelude (Num(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Sequence as Seq
38 import qualified Data.Strict.Maybe as Strict
39 import qualified Data.Text as Text
40 import qualified Data.TreeMap.Strict as TreeMap
41 import qualified Data.TreeSeq.Strict as Tree
42
43 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words)
44 import Language.XML (XmlPos(..))
45 import qualified Language.DTC.Document as DTC
46
47 -- import Debug.Trace (trace)
48
49 termsByChar :: Terms -> Map Char Terms
50 termsByChar =
51 foldr (\aliases acc ->
52 case aliases of
53 (Word w:_):_ | not (Text.null w) ->
54 Map.insertWith (<>)
55 (Char.toUpper $ Text.index w 0)
56 [aliases] acc
57 _ -> acc
58 ) Map.empty
59
60 -- * Type 'Ref'
61 data Ref
62 = Ref
63 { term :: Words
64 , count :: Count
65 , section :: XmlPos
66 } deriving (Show)
67
68 -- ** Type 'Path'
69 type Path = TreeMap.Path Word
70
71 -- ** Type 'Refs'
72 type Refs = TreeMap Word [Ref]
73
74 refsOfTerms :: Terms -> Refs
75 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
76 where
77 f [] = []
78 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
79
80 pathFromWords :: Words -> Maybe Path
81 pathFromWords ws =
82 case ws >>= unSpace of
83 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
84 _ -> Nothing
85 where
86 unSpace = \case
87 Space -> []
88 Word w -> [w]
89
90 -- * Type 'State'
91 data State
92 = State
93 { state_refs :: Refs
94 , state_text :: Bool
95 , state_section :: XmlPos
96 }
97 state :: State
98 state = State
99 { state_refs = mempty
100 , state_text = True
101 , state_section = def
102 }
103
104 -- * Class 'Indexify'
105 class Indexify a where
106 indexify :: a -> S.State State a
107 instance Indexify (Tree k a) => Indexify [Tree k a] where
108 indexify = mapM indexify
109 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
110 indexify = \case
111 Tree0 v -> Tree0 <$> indexify v
112 TreeN k v ->
113 case k of
114 DTC.Section{..} -> do
115 before@State{state_section} <- S.get
116 S.put before{state_section = pos}
117 t <- TreeN <$> indexify k <*> indexify v
118 after <- S.get
119 S.put after{state_section}
120 return t
121 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
122 indexify = mapM indexify
123 {-
124 instance Indexify a => Indexify (Seq a) where
125 indexify = mapM indexify
126 -}
127 instance Indexify DTC.BodyKey where
128 indexify = \case
129 DTC.Section{..} ->
130 DTC.Section pos attrs
131 <$> indexify title
132 <*> pure aliases
133 instance Indexify DTC.BodyValue where
134 indexify = \case
135 d@DTC.ToC{} -> pure d
136 d@DTC.ToF{} -> pure d
137 d@DTC.Index{} -> pure d
138 DTC.Figure{..} ->
139 DTC.Figure pos attrs type_
140 <$> indexify title
141 <*> indexify blocks
142 DTC.Block v ->
143 DTC.Block <$> indexify v
144 instance Indexify [DTC.Reference] where
145 indexify = mapM indexify
146 instance Indexify [DTC.Block] where
147 indexify = mapM indexify
148 instance Indexify [[DTC.Block]] where
149 indexify = mapM (mapM indexify)
150 instance Indexify DTC.Title where
151 indexify (DTC.Title t) = DTC.Title <$> indexify t
152 instance Indexify DTC.Block where
153 indexify = \case
154 DTC.Para{..} -> DTC.Para pos attrs <$> indexify lines
155 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
156 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
157 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
158 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
159 d@DTC.Comment{} -> pure d
160
161 instance Indexify DTC.Lines where
162 indexify ls =
163 Tree.joinTrees <$> traverse (traverse go) ls
164 where
165 go = \case
166 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
167 DTC.Plain p -> do
168 State{..} <- S.get
169 let (refs,ret) = indexifyWords state_section state_refs (wordify p)
170 S.modify $ \s -> s{state_refs=refs}
171 return ret
172 instance Indexify DTC.Reference where
173 indexify = return
174 instance Indexify DTC.Artwork where
175 indexify = return
176
177 wordify :: Text -> Words
178 wordify = List.reverse . go []
179 where
180 go :: Words -> Text -> Words
181 go acc t =
182 case Text.span Char.isAlphaNum t of
183 ("",_) ->
184 case Text.span Char.isSpace t of
185 ("",_) ->
186 case Text.uncons t of
187 Nothing -> acc
188 Just (c,r) -> go (Word (Text.singleton c) : acc) r
189 (_s,r) -> go (Space : acc) r
190 (w,r) -> go (Word w : acc) r
191
192 plainifyWord :: WordOrSpace -> Text
193 plainifyWord = \case
194 Word w -> w
195 Space -> " "
196
197 plainifyWords :: Words -> Text
198 plainifyWords = Text.concat . (plainifyWord <$>)
199
200 indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Lines)
201 indexifyWords section = go mempty
202 where
203 go :: DTC.Lines -> Refs -> Words -> (Refs, DTC.Lines)
204 go acc refs inp =
205 case inp of
206 [] -> (refs, acc)
207 Space : next ->
208 go (acc |> Tree0 (DTC.Plain " ")) refs next
209 Word w : next ->
210 case goWords [] refs [] inp of
211 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
212 Just (Ref{term,count}, ls, ns, rs) ->
213 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
214 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
215 goWords ::
216 Words -> Refs ->
217 Words -> Words ->
218 Maybe (Ref, Words, Words, Refs)
219 goWords ws m@(TreeMap refsByWord) prev inp =
220 case inp of
221 [] -> Nothing
222 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
223 curr@(Word w) : next ->
224 let words = curr:ws in
225 case Map.lookup w refsByWord of
226 Nothing -> Nothing
227 Just nod@TreeMap.Node{..} ->
228 case node_value of
229 Strict.Nothing ->
230 if null node_descendants
231 then Nothing
232 else case goWords words node_descendants (curr:prev) next of
233 Nothing -> Nothing
234 Just (ref, ls, ns, rs) ->
235 Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
236 Strict.Just refs ->
237 case goWords words node_descendants (curr:prev) next of
238 Nothing ->
239 let term = List.reverse words in
240 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
241 let ref = Ref{term, count, section} in
242 Just (ref, curr:prev, next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
243 Just (ref, ls, ns, rs) ->
244 Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)