]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/LevelMaker.hs
process the ancestors before the synchronic clustering
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE TypeSynonymInstances #-}
14
15 module Gargantext.Core.Viz.Phylo.LevelMaker
16 where
17
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.Metrics
28 import Gargantext.Core.Viz.Phylo.Aggregates
29 import Gargantext.Core.Viz.Phylo.Cluster
30 import Gargantext.Core.Viz.Phylo.BranchMaker
31 import Gargantext.Core.Viz.Phylo.LinkMaker
32 import Gargantext.Core.Viz.Phylo.Tools
33 import Gargantext.Core.Text.Context (TermList)
34
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
38
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
41
42
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
46
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
49 where
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54
55
56 instance PhyloLevelMaker PhyloCluster
57 where
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
60 addPhyloLevel lvl m p
61 | lvl > 1 = addPhyloLevel' lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
68 in clusters'
69 --------------------------------------
70
71
72 instance PhyloLevelMaker PhyloFis
73 where
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
76 addPhyloLevel lvl m p
77 | lvl == 1 = addPhyloLevel' lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
84 in groups'
85 --------------------------------------
86
87
88 instance PhyloLevelMaker Document
89 where
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
92 addPhyloLevel lvl m p
93 | lvl == 0 = addPhyloLevel' lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
98 $ (nub . concat)
99 $ map text l
100 --------------------------------------
101
102
103 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
104 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
105 addPhyloLevel' lvl m p = alterPhyloPeriods
106 (\period -> let pId = _phylo_periodId period
107 in over phylo_periodLevels
108 (\phyloLevels ->
109 let groups = toPhyloGroups lvl pId (m ! pId) m p
110 in trace (show (length groups)
111 <> " groups for "
112 <> show (pId) )
113 $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
114 ) period
115 ) p
116
117
118 ----------------------
119 -- | toPhyloGroup | --
120 ----------------------
121
122
123 -- | To transform a Clique into a PhyloGroup
124 cliqueToGroup :: PhyloPeriodId
125 -> Level
126 -> Int
127 -> Text
128 -> PhyloFis
129 -> Map Date (Map (Int,Int) Double)
130 -> Vector Ngrams
131 -> PhyloGroup
132 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
133 (getNgramsMeta cooc ngrams)
134 -- empty
135 (singleton "support" (fromIntegral $ getSupport fis))
136 Nothing
137 cooc
138 [] [] [] childs
139 where
140 --------------------------------------
141 cooc :: Map (Int, Int) Double
142 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
143 --------------------------------------
144 ngrams :: [Int]
145 ngrams = sort $ map (\x -> getIdxInRoots' x root)
146 $ Set.toList
147 $ getClique fis
148 --------------------------------------
149 childs :: [Pointer]
150 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
151 --------------------------------------
152
153
154 -- | To transform a Cluster into a Phylogroup
155 clusterToGroup :: PhyloPeriodId
156 -> Level
157 -> Int
158 -> Text
159 -> PhyloCluster
160 -> Map (Date,Date) [PhyloCluster]
161 -> Phylo
162 -> PhyloGroup
163 clusterToGroup prd lvl idx lbl groups _m p =
164 PhyloGroup ((prd, lvl), idx) lbl ngrams
165 (getNgramsMeta cooc ngrams)
166 -- empty
167 empty
168 Nothing
169 cooc
170 ascLink desLink [] childs
171 where
172 --------------------------------------
173 cooc :: Map (Int, Int) Double
174 cooc = getMiniCooc (listToFullCombi ngrams)
175 (periodsToYears [prd] )
176 (getPhyloCooc p )
177 --------------------------------------
178 childs :: [Pointer]
179 childs = map (\g -> (getGroupId g, 1)) groups
180 ascLink = concat $ map getGroupPeriodParents groups
181 desLink = concat $ map getGroupPeriodChilds groups
182 --------------------------------------
183 ngrams :: [Int]
184 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
185 --------------------------------------
186
187
188 -- | To transform a list of Ngrams into a PhyloGroup
189 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
190 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
191 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
192 [] [] [] []
193
194
195 ----------------------
196 -- | toPhyloLevel | --
197 ----------------------
198
199
200 -- | To reconstruct the Phylo from a set of Document to a given Level
201 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
202 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
203 where
204 --------------------------------------
205 phylo1 :: Phylo
206 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
207 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
208 --------------------------------------
209 -- phylo0 :: Phylo
210 -- phylo0 = tracePhyloN 0
211 -- $ addPhyloLevel 0 phyloDocs phyloBase
212 --------------------------------------
213 phyloDocs :: Map (Date, Date) [Document]
214 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
215 --------------------------------------
216 phyloBase :: Phylo
217 phyloBase = tracePhyloBase
218 $ toPhyloBase q init c termList fis
219 where
220 init = initPhyloParam (Just defaultPhyloVersion)
221 (Just defaultSoftware )
222 (Just q )
223 ---------------------------------------
224
225
226 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
227 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
228 toNthLevel lvlMax prox clus p
229 | lvl >= lvlMax = p
230 | otherwise = toNthLevel lvlMax prox clus
231 $ traceBranches (lvl + 1)
232 $ setPhyloBranches (lvl + 1)
233 -- \$ transposePeriodLinks (lvl + 1)
234 $ traceTranspose (lvl + 1) Descendant
235 $ transposeLinks (lvl + 1) Descendant
236 $ traceTranspose (lvl + 1) Ascendant
237 $ transposeLinks (lvl + 1) Ascendant
238 $ tracePhyloN (lvl + 1)
239 $ setLevelLinks (lvl, lvl + 1)
240 $ addPhyloLevel (lvl + 1) (clusters) p
241 where
242 --------------------------------------
243 clusters :: Map (Date,Date) [PhyloCluster]
244 clusters = phyloToClusters lvl clus p
245 --------------------------------------
246 lvl :: Level
247 lvl = getLastLevel p
248 --------------------------------------
249
250
251 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
252 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
253 toPhylo1 clus prox d p = case clus of
254 Fis (FisParams k s t) -> traceBranches 1
255 -- \$ reLinkPhyloBranches 1
256 -- \$ traceBranches 1
257 $ setPhyloBranches 1
258 $ traceTempoMatching Descendant 1
259 $ interTempoMatching Descendant 1 prox
260 $ traceTempoMatching Ascendant 1
261 $ interTempoMatching Ascendant 1 prox
262 $ tracePhyloN 1
263 -- \$ setLevelLinks (0,1)
264 $ addPhyloLevel 1 (getPhyloFis phyloFis)
265 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
266 where
267 --------------------------------------
268 phyloFis :: Phylo
269 phyloFis = if (null $ getPhyloFis p)
270 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
271 else p & phylo_fis .~ docsToFis d p
272 --------------------------------------
273
274 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
275
276
277 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
278 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
279 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
280 where
281 --------------------------------------
282 cooc :: Map Date (Map (Int,Int) Double)
283 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
284 --------------------------------------
285 nbDocs :: Map Date Double
286 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
287 --------------------------------------
288 foundations :: PhyloFoundations
289 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
290 --------------------------------------
291 periods :: [(Date,Date)]
292 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
293 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
294 --------------------------------------
295
296
297 -----------------
298 -- | Tracers | --
299 -----------------
300
301
302 tracePhyloN :: Level -> Phylo -> Phylo
303 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
304 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
305
306 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
307 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
308
309
310 tracePhyloBase :: Phylo -> Phylo
311 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
312 <> show (length $ _phylo_periods p) <> " periods from "
313 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
314 <> " to "
315 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
316 <> "\n"
317 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
318
319
320 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
321 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
322 <> "count : " <> show (length pts) <> " pointers\n") p
323 where
324 --------------------------------------
325 pts :: [Pointer]
326 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
327 --------------------------------------
328
329
330 traceBranches :: Level -> Phylo -> Phylo
331 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
332 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
333 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
334 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
335 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
336 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
337 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
338 where
339 --------------------------------------
340 brs :: [Double]
341 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
342 $ filter (\(id,_) -> (fst id) == lvl)
343 $ getGroupsByBranches p
344 --------------------------------------