]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
Merge branch 'dev' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext into...
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Aggregates.Cluster
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Viz.Phylo.Aggregates.Cooc
37 import Gargantext.Text.Context (TermList)
38
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
42
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
45
46
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
49 where
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54
55
56 instance PhyloLevelMaker PhyloCluster
57 where
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
60 addPhyloLevel lvl m p
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
68 in clusters'
69 --------------------------------------
70
71
72 instance PhyloLevelMaker PhyloFis
73 where
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
76 addPhyloLevel lvl m p
77 | lvl == 1 = toPhyloLevel lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
84 in groups'
85 --------------------------------------
86
87
88 instance PhyloLevelMaker Document
89 where
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
92 addPhyloLevel lvl m p
93 | lvl == 0 = toPhyloLevel lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
98 $ (nub . concat)
99 $ map text l
100 --------------------------------------
101
102
103 -- | To transform a Cluster into a Phylogroup
104 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
105 clusterToGroup prd lvl idx lbl groups _m p =
106 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
107 Nothing
108 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
109 ascLink desLink [] childs
110 where
111 --------------------------------------
112 childs :: [Pointer]
113 childs = map (\g -> (getGroupId g, 1)) groups
114 ascLink = concat $ map getGroupPeriodParents groups
115 desLink = concat $ map getGroupPeriodChilds groups
116 --------------------------------------
117 ngrams :: [Int]
118 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
119 --------------------------------------
120
121
122 -- | To transform a Clique into a PhyloGroup
123 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
124 cliqueToGroup prd lvl idx lbl fis p =
125 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
126 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
127 [] [] [] childs
128 where
129 --------------------------------------
130 ngrams :: [Int]
131 ngrams = sort $ map (\x -> getIdxInRoots x p)
132 $ Set.toList
133 $ getClique fis
134 --------------------------------------
135 childs :: [Pointer]
136 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
137 --------------------------------------
138
139
140 -- | To transform a list of Ngrams into a PhyloGroup
141 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
142 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
143 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
144 [] [] [] []
145
146
147 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
148 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
149 toPhyloLevel lvl m p = alterPhyloPeriods
150 (\period -> let pId = _phylo_periodId period
151 in over (phylo_periodLevels)
152 (\phyloLevels ->
153 let groups = toPhyloGroups lvl pId (m ! pId) m p
154 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
155 ) period) p
156
157
158 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
159 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
160 toNthLevel lvlMax prox clus p
161 | lvl >= lvlMax = p
162 | otherwise = toNthLevel lvlMax prox clus
163 $ traceBranches (lvl + 1)
164 $ setPhyloBranches (lvl + 1)
165 $ traceTranspose (lvl + 1)
166 $ transposePeriodLinks (lvl + 1)
167 $ tracePhyloN (lvl + 1)
168 $ setLevelLinks (lvl, lvl + 1)
169 $ addPhyloLevel (lvl + 1)
170 (clusters) p
171 where
172 --------------------------------------
173 clusters :: Map (Date,Date) [PhyloCluster]
174 clusters = phyloToClusters lvl clus p
175 --------------------------------------
176 lvl :: Level
177 lvl = getLastLevel p
178 --------------------------------------
179
180
181 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
182 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
183 toPhylo1 clus prox d p = case clus of
184 Fis (FisParams k s t) -> traceReBranches 1
185 -- $ reLinkPhyloBranches 1
186 $ traceBranches 1
187 $ setPhyloBranches 1
188 $ traceTempoMatching Descendant 1
189 $ interTempoMatching Descendant 1 prox
190 $ traceTempoMatching Ascendant 1
191 $ interTempoMatching Ascendant 1 prox
192 $ tracePhylo1
193 $ setLevelLinks (0,1)
194 $ addPhyloLevel 1 phyloFis phylo'
195 where
196 --------------------------------------
197 phyloFis :: Map (Date, Date) [PhyloFis]
198 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
199 --------------------------------------
200 phylo' :: Phylo
201 phylo' = docsToFis' d p
202 --------------------------------------
203
204 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
205
206
207 -- | To reconstruct the Level 0 of a Phylo
208 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
209 toPhylo0 d p = addPhyloLevel 0 d p
210
211
212 class PhyloMaker corpus
213 where
214 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
215 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
216 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
217
218 instance PhyloMaker [(Date, Text)]
219 where
220 --------------------------------------
221 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
222 where
223 --------------------------------------
224 phylo1 :: Phylo
225 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
226 --------------------------------------
227 phylo0 :: Phylo
228 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
229 --------------------------------------
230 phyloDocs :: Map (Date, Date) [Document]
231 phyloDocs = corpusToDocs c phyloBase
232 --------------------------------------
233 phyloBase :: Phylo
234 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
235 --------------------------------------
236 --------------------------------------
237 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
238 where
239 --------------------------------------
240 cooc :: Map Date (Map (Int,Int) Double)
241 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
242 --------------------------------------
243 nbDocs :: Map Date Double
244 nbDocs = countDocs c
245 --------------------------------------
246 foundations :: PhyloFoundations
247 foundations = PhyloFoundations (initFoundationsRoots roots) termList
248 --------------------------------------
249 periods :: [(Date,Date)]
250 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
251 $ both fst (head' "LevelMaker" c,last c)
252 --------------------------------------
253 --------------------------------------
254 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
255
256
257 instance PhyloMaker [Document]
258 where
259 --------------------------------------
260 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
261 where
262 --------------------------------------
263 phylo1 :: Phylo
264 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
265 --------------------------------------
266 phylo0 :: Phylo
267 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
268 --------------------------------------
269 phyloDocs :: Map (Date, Date) [Document]
270 phyloDocs = corpusToDocs c phyloBase
271 --------------------------------------
272 phyloBase :: Phylo
273 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
274 --------------------------------------
275 --------------------------------------
276 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
277 where
278 --------------------------------------
279 cooc :: Map Date (Map (Int,Int) Double)
280 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
281 --------------------------------------
282 nbDocs :: Map Date Double
283 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
284 --------------------------------------
285 foundations :: PhyloFoundations
286 foundations = PhyloFoundations (initFoundationsRoots roots) termList
287 --------------------------------------
288 periods :: [(Date,Date)]
289 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
290 $ both date (head' "LevelMaker" c,last c)
291 --------------------------------------
292 --------------------------------------
293 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
294
295
296 -----------------
297 -- | Tracers | --
298 -----------------
299
300
301 tracePhylo0 :: Phylo -> Phylo
302 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
303 <> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
304
305 tracePhylo1 :: Phylo -> Phylo
306 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
307 <> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
308
309 tracePhyloN :: Level -> Phylo -> Phylo
310 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
311 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
312
313
314 traceTranspose :: Level -> Phylo -> Phylo
315 traceTranspose lvl p = trace ("----\nTranspose "
316 <> show (length $ getGroupsWithLevel lvl p) <> " groups in Phylo "
317 <> show (lvl) <> "\n") p
318
319
320 tracePhyloBase :: Phylo -> Phylo
321 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
322 <> show (length $ _phylo_periods p) <> " periods from "
323 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
324 <> " to "
325 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
326 <> "\n"
327 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
328
329
330 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
331 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
332 <> "count : " <> show (length pts) <> " pointers\n"
333 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
334 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
335 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
336 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
337 where
338 --------------------------------------
339 sim :: [Double]
340 sim = sort $ map snd pts
341 --------------------------------------
342 pts :: [Pointer]
343 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
344 --------------------------------------
345
346
347 traceReBranches :: Level -> Phylo -> Phylo
348 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
349 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
350 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
351 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
352 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
353 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
354 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
355 where
356 --------------------------------------
357 brs :: [Double]
358 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
359 $ filter (\(id,_) -> (fst id) == lvl)
360 $ getGroupsByBranches p
361 --------------------------------------
362
363
364 traceBranches :: Level -> Phylo -> Phylo
365 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
366 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
367 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
368 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
369 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
370 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
371 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
372 where
373 --------------------------------------
374 brs :: [Double]
375 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
376 $ filter (\(id,_) -> (fst id) == lvl)
377 $ getGroupsByBranches p
378 --------------------------------------