]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
add rebranching to link distante branches
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Viz.Phylo.Aggregates.Cooc
36 import Gargantext.Text.Context (TermList)
37
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
41
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
44
45
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
48 where
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
53
54
55 instance PhyloLevelMaker PhyloCluster
56 where
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 addPhyloLevel lvl m p
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
66
67
68 instance PhyloLevelMaker PhyloFis
69 where
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 addPhyloLevel lvl m p
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
78 --------------------------------------
79
80
81 instance PhyloLevelMaker Document
82 where
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 addPhyloLevel lvl m p
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
91 $ zip [1..]
92 $ (nub . concat)
93 $ map text l
94 --------------------------------------
95
96
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
101 Nothing
102 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
103 [] [] [] childs
104 where
105 --------------------------------------
106 childs :: [Pointer]
107 childs = map (\g -> (getGroupId g, 1)) groups
108 --------------------------------------
109 ngrams :: [Int]
110 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
111 --------------------------------------
112
113
114 -- | To transform a Clique into a PhyloGroup
115 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
116 cliqueToGroup prd lvl idx lbl fis p =
117 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
118 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
119 [] [] [] []
120 where
121 --------------------------------------
122 ngrams :: [Int]
123 ngrams = sort $ map (\x -> getIdxInRoots x p)
124 $ Set.toList
125 $ getClique fis
126 --------------------------------------
127
128
129 -- | To transform a list of Ngrams into a PhyloGroup
130 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
131 ngramsToGroup prd lvl idx lbl ngrams p =
132 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
133 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
134 [] [] [] []
135
136
137 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
138 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
139 toPhyloLevel lvl m p = alterPhyloPeriods
140 (\period -> let pId = _phylo_periodId period
141 in over (phylo_periodLevels)
142 (\phyloLevels ->
143 let groups = toPhyloGroups lvl pId (m ! pId) m p
144 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
145 ) period) p
146
147
148 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
149 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
150 toNthLevel lvlMax prox clus p
151 | lvl >= lvlMax = p
152 | otherwise = toNthLevel lvlMax prox clus
153 $ traceBranches (lvl + 1)
154 $ setPhyloBranches (lvl + 1)
155 $ transposePeriodLinks (lvl + 1)
156 $ setLevelLinks (lvl, lvl + 1)
157 $ addPhyloLevel (lvl + 1)
158 (clusters) p
159 where
160 --------------------------------------
161 clusters :: Map (Date,Date) [PhyloCluster]
162 clusters = phyloToClusters lvl clus p
163 --------------------------------------
164 lvl :: Level
165 lvl = getLastLevel p
166 --------------------------------------
167
168
169 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
170 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
171 toPhylo1 clus prox d p = case clus of
172 Fis (FisParams k s t) -> traceReBranches 1
173 $ linkPhyloBranches 1 prox
174 $ traceBranches 1
175 $ setPhyloBranches 1
176 $ traceTempoMatching Descendant 1
177 $ interTempoMatching Descendant 1 prox
178 $ traceTempoMatching Ascendant 1
179 $ interTempoMatching Ascendant 1 prox
180 $ setLevelLinks (0,1)
181 $ setLevelLinks (1,0)
182 $ addPhyloLevel 1 phyloFis phylo'
183 where
184 --------------------------------------
185 phyloFis :: Map (Date, Date) [PhyloFis]
186 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
187 --------------------------------------
188 phylo' :: Phylo
189 phylo' = docsToFis' d p
190 --------------------------------------
191
192 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
193
194
195 -- | To reconstruct the Level 0 of a Phylo
196 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
197 toPhylo0 d p = addPhyloLevel 0 d p
198
199
200 class PhyloMaker corpus
201 where
202 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
203 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
204 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
205
206 instance PhyloMaker [(Date, Text)]
207 where
208 --------------------------------------
209 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
210 where
211 --------------------------------------
212 phylo1 :: Phylo
213 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
214 --------------------------------------
215 phylo0 :: Phylo
216 phylo0 = toPhylo0 phyloDocs phyloBase
217 --------------------------------------
218 phyloDocs :: Map (Date, Date) [Document]
219 phyloDocs = corpusToDocs c phyloBase
220 --------------------------------------
221 phyloBase :: Phylo
222 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
223 --------------------------------------
224 --------------------------------------
225 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
226 where
227 --------------------------------------
228 cooc :: Map Date (Map (Int,Int) Double)
229 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
230 --------------------------------------
231 nbDocs :: Map Date Double
232 nbDocs = countDocs c
233 --------------------------------------
234 foundations :: PhyloFoundations
235 foundations = PhyloFoundations (initFoundationsRoots roots) termList
236 --------------------------------------
237 periods :: [(Date,Date)]
238 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
239 $ both fst (head' "LevelMaker" c,last c)
240 --------------------------------------
241 --------------------------------------
242 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
243
244
245 instance PhyloMaker [Document]
246 where
247 --------------------------------------
248 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
249 where
250 --------------------------------------
251 phylo1 :: Phylo
252 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
253 --------------------------------------
254 phylo0 :: Phylo
255 phylo0 = toPhylo0 phyloDocs phyloBase
256 --------------------------------------
257 phyloDocs :: Map (Date, Date) [Document]
258 phyloDocs = corpusToDocs c phyloBase
259 --------------------------------------
260 phyloBase :: Phylo
261 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
262 --------------------------------------
263 --------------------------------------
264 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
265 where
266 --------------------------------------
267 cooc :: Map Date (Map (Int,Int) Double)
268 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
269 --------------------------------------
270 nbDocs :: Map Date Double
271 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
272 --------------------------------------
273 foundations :: PhyloFoundations
274 foundations = PhyloFoundations (initFoundationsRoots roots) termList
275 --------------------------------------
276 periods :: [(Date,Date)]
277 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
278 $ both date (head' "LevelMaker" c,last c)
279 --------------------------------------
280 --------------------------------------
281 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
282
283
284 -----------------
285 -- | Tracers | --
286 -----------------
287
288
289 tracePhylo0 :: Phylo -> Phylo
290 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n") p
291
292 tracePhylo1 :: Phylo -> Phylo
293 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n") p
294
295 tracePhyloN :: Level -> Phylo -> Phylo
296 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n") p
297
298
299 tracePhyloBase :: Phylo -> Phylo
300 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
301 <> show (length $ _phylo_periods p) <> " periods from "
302 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
303 <> " to "
304 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
305 <> "\n"
306 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
307
308
309 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
310 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
311 <> "count : " <> show (length pts) <> " pointers\n"
312 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
313 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
314 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
315 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
316 where
317 --------------------------------------
318 sim :: [Double]
319 sim = sort $ map snd pts
320 --------------------------------------
321 pts :: [Pointer]
322 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
323 --------------------------------------
324
325
326 traceReBranches :: Level -> Phylo -> Phylo
327 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
328 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
329 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
330 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
331 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
332 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
333 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
334 where
335 --------------------------------------
336 brs :: [Double]
337 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
338 $ filter (\(id,_) -> (fst id) == lvl)
339 $ getGroupsByBranches p
340 --------------------------------------
341
342
343 traceBranches :: Level -> Phylo -> Phylo
344 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
345 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
346 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
347 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
348 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
349 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
350 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
351 where
352 --------------------------------------
353 brs :: [Double]
354 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
355 $ filter (\(id,_) -> (fst id) == lvl)
356 $ getGroupsByBranches p
357 --------------------------------------