1 {-# LANGUAGE DuplicateRecordFields #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE MultiParamTypeClasses #-}
5 {-# LANGUAGE OverloadedStrings #-}
6 {-# LANGUAGE ScopedTypeVariables #-}
7 {-# LANGUAGE TypeApplications #-}
8 {-# LANGUAGE ViewPatterns #-}
9 {-# OPTIONS_GHC -fno-warn-orphans #-}
10 -- | Compute an Index for a DTC.
11 module Language.DTC.Index where
13 import Control.Applicative (Applicative(..))
14 import Control.Category
15 import Control.Monad (Monad(..), mapM)
17 import Data.Char (Char)
18 import Data.Default.Class (Default(..))
19 import Data.Foldable (Foldable(..), concat)
20 import Data.Function (($), const)
21 import Data.Functor ((<$>))
22 import Data.Map.Strict (Map)
23 import Data.Maybe (Maybe(..), maybe)
24 import Data.Monoid (Monoid(..))
25 import Data.Semigroup (Semigroup(..))
26 import Data.Sequence ((|>))
27 import Data.Text (Text)
28 import Data.Traversable (Traversable(..))
29 import Data.TreeMap.Strict (TreeMap(..))
30 import Data.TreeSeq.Strict (Tree(..), Trees)
31 import Prelude (Num(..))
32 import Text.Show (Show(..))
33 import qualified Control.Monad.Trans.State as S
34 import qualified Data.Char as Char
35 import qualified Data.List as List
36 import qualified Data.Map.Strict as Map
37 import qualified Data.Sequence as Seq
38 import qualified Data.Strict.Maybe as Strict
39 import qualified Data.Text as Text
40 import qualified Data.TreeMap.Strict as TreeMap
41 import qualified Data.TreeSeq.Strict as Tree
43 import Language.DTC.Document (Count,Words,Terms, Word, WordOrSpace(..), Words)
44 import Language.XML (XmlPos(..))
45 import qualified Language.DTC.Document as DTC
47 -- import Debug.Trace (trace)
49 termsByChar :: Terms -> Map Char Terms
51 foldr (\aliases acc ->
53 (Word w:_):_ | not (Text.null w) ->
55 (Char.toUpper $ Text.index w 0)
69 type Path = TreeMap.Path Word
72 type Refs = TreeMap Word [Ref]
74 refsOfTerms :: Terms -> Refs
75 refsOfTerms = TreeMap.fromList const . (>>= f) . concat
78 f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws
80 pathFromWords :: Words -> Maybe Path
82 case ws >>= unSpace of
83 p:ps | not (Text.null p) -> Just (TreeMap.path p ps)
95 , state_section :: XmlPos
101 , state_section = def
104 -- * Class 'Indexify'
105 class Indexify a where
106 indexify :: a -> S.State State a
107 instance Indexify (Tree k a) => Indexify [Tree k a] where
108 indexify = mapM indexify
109 instance Indexify (Tree DTC.BodyKey DTC.BodyValue) where
111 Tree0 v -> Tree0 <$> indexify v
114 DTC.Section{..} -> do
115 before@State{state_section} <- S.get
116 S.put before{state_section = pos}
117 t <- TreeN <$> indexify k <*> indexify v
119 S.put after{state_section}
121 instance Indexify (Trees DTC.BodyKey DTC.BodyValue) where
122 indexify = mapM indexify
124 instance Indexify a => Indexify (Seq a) where
125 indexify = mapM indexify
127 instance Indexify DTC.BodyKey where
130 DTC.Section pos attrs
133 instance Indexify DTC.BodyValue where
135 d@DTC.ToC{} -> pure d
136 d@DTC.ToF{} -> pure d
137 d@DTC.Index{} -> pure d
139 DTC.Figure pos attrs type_
143 DTC.Block <$> indexify v
144 instance Indexify [DTC.Reference] where
145 indexify = mapM indexify
146 instance Indexify [DTC.Block] where
147 indexify = mapM indexify
148 instance Indexify [[DTC.Block]] where
149 indexify = mapM (mapM indexify)
150 instance Indexify DTC.Title where
151 indexify (DTC.Title t) = DTC.Title <$> indexify t
152 instance Indexify DTC.Block where
154 DTC.Para{..} -> DTC.Para pos attrs <$> indexify lines
155 DTC.OL{..} -> DTC.OL pos attrs <$> indexify items
156 DTC.UL{..} -> DTC.UL pos attrs <$> indexify items
157 DTC.RL{..} -> DTC.RL pos attrs <$> indexify refs
158 DTC.Artwork{..} -> DTC.Artwork pos attrs <$> indexify art
159 d@DTC.Comment{} -> pure d
161 instance Indexify DTC.Lines where
163 Tree.joinTrees <$> traverse (traverse go) ls
166 DTC.BR -> pure $ Seq.singleton $ Tree0 DTC.BR
169 let (refs,ret) = indexifyWords state_section state_refs (wordify p)
170 S.modify $ \s -> s{state_refs=refs}
172 instance Indexify DTC.Reference where
174 instance Indexify DTC.Artwork where
177 wordify :: Text -> Words
178 wordify = List.reverse . go []
180 go :: Words -> Text -> Words
182 case Text.span Char.isAlphaNum t of
184 case Text.span Char.isSpace t of
186 case Text.uncons t of
188 Just (c,r) -> go (Word (Text.singleton c) : acc) r
189 (_s,r) -> go (Space : acc) r
190 (w,r) -> go (Word w : acc) r
192 plainifyWord :: WordOrSpace -> Text
197 plainifyWords :: Words -> Text
198 plainifyWords = Text.concat . (plainifyWord <$>)
200 indexifyWords :: XmlPos -> Refs -> Words -> (Refs, DTC.Lines)
201 indexifyWords section = go mempty
203 go :: DTC.Lines -> Refs -> Words -> (Refs, DTC.Lines)
208 go (acc |> Tree0 (DTC.Plain " ")) refs next
210 case goWords [] refs [] inp of
211 Nothing -> go (acc |> Tree0 (DTC.Plain w)) refs next
212 Just (Ref{term,count}, ls, ns, rs) ->
213 let lines = Seq.fromList $ Tree0 . DTC.Plain . plainifyWord <$> List.reverse ls in
214 go (acc |> TreeN DTC.Iref{term, count} lines) rs ns
218 Maybe (Ref, Words, Words, Refs)
219 goWords ws m@(TreeMap refsByWord) prev inp =
222 curr@Space : next -> goWords (curr:ws) m (curr:prev) next
223 curr@(Word w) : next ->
224 let words = curr:ws in
225 case Map.lookup w refsByWord of
227 Just nod@TreeMap.Node{..} ->
230 if null node_descendants
232 else case goWords words node_descendants (curr:prev) next of
234 Just (ref, ls, ns, rs) ->
235 Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)
237 case goWords words node_descendants (curr:prev) next of
239 let term = List.reverse words in
240 let count = case refs of [] -> 1; Ref{count=c}:_ -> c + 1 in
241 let ref = Ref{term, count, section} in
242 Just (ref, curr:prev, next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ ref:refs} refsByWord)
243 Just (ref, ls, ns, rs) ->
244 Just (ref, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} refsByWord)