]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
add parallelism
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Aggregates.Cluster
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Viz.Phylo.Aggregates.Cooc
37 import Gargantext.Text.Context (TermList)
38
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
42
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
45
46
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
49 where
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54
55
56 instance PhyloLevelMaker PhyloCluster
57 where
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
60 addPhyloLevel lvl m p
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
68 in clusters'
69 --------------------------------------
70
71
72 instance PhyloLevelMaker PhyloFis
73 where
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
76 addPhyloLevel lvl m p
77 | lvl == 1 = toPhyloLevel lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
84 in groups'
85 --------------------------------------
86
87
88 instance PhyloLevelMaker Document
89 where
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
92 addPhyloLevel lvl m p
93 | lvl == 0 = toPhyloLevel lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
98 $ (nub . concat)
99 $ map text l
100 --------------------------------------
101
102
103 -- | To transform a Cluster into a Phylogroup
104 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
105 clusterToGroup prd lvl idx lbl groups _m p =
106 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
107 Nothing
108 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
109 ascLink desLink [] childs
110 where
111 --------------------------------------
112 childs :: [Pointer]
113 childs = map (\g -> (getGroupId g, 1)) groups
114 ascLink = concat $ map getGroupPeriodParents groups
115 desLink = concat $ map getGroupPeriodChilds groups
116 --------------------------------------
117 ngrams :: [Int]
118 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
119 --------------------------------------
120
121
122 -- | To transform a Clique into a PhyloGroup
123 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
124 cliqueToGroup prd lvl idx lbl fis p =
125 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
126 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
127 [] [] [] childs
128 where
129 --------------------------------------
130 ngrams :: [Int]
131 ngrams = sort $ map (\x -> getIdxInRoots x p)
132 $ Set.toList
133 $ getClique fis
134 --------------------------------------
135 childs :: [Pointer]
136 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
137 --------------------------------------
138
139
140 -- | To transform a list of Ngrams into a PhyloGroup
141 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
142 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
143 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
144 [] [] [] []
145
146
147 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
148 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
149 toPhyloLevel lvl m p = alterPhyloPeriods
150 (\period -> let pId = _phylo_periodId period
151 in over (phylo_periodLevels)
152 (\phyloLevels ->
153 let groups = toPhyloGroups lvl pId (m ! pId) m p
154 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
155 ) period) p
156
157
158 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
159 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
160 toNthLevel lvlMax prox clus p
161 | lvl >= lvlMax = p
162 | otherwise = toNthLevel lvlMax prox clus
163 $ traceBranches (lvl + 1)
164 $ setPhyloBranches (lvl + 1)
165 $ transposePeriodLinks (lvl + 1)
166 $ tracePhyloN (lvl + 1)
167 $ setLevelLinks (lvl, lvl + 1)
168 $ addPhyloLevel (lvl + 1)
169 (clusters) p
170 where
171 --------------------------------------
172 clusters :: Map (Date,Date) [PhyloCluster]
173 clusters = phyloToClusters lvl clus p
174 --------------------------------------
175 lvl :: Level
176 lvl = getLastLevel p
177 --------------------------------------
178
179
180 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
181 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
182 toPhylo1 clus prox d p = case clus of
183 Fis (FisParams k s t) -> traceReBranches 1
184 -- $ linkPhyloBranches 1 prox
185 $ traceBranches 1
186 $ setPhyloBranches 1
187 $ traceTempoMatching Descendant 1
188 $ interTempoMatching Descendant 1 prox
189 $ traceTempoMatching Ascendant 1
190 $ interTempoMatching Ascendant 1 prox
191 $ tracePhylo1
192 $ setLevelLinks (0,1)
193 $ addPhyloLevel 1 phyloFis phylo'
194 where
195 --------------------------------------
196 phyloFis :: Map (Date, Date) [PhyloFis]
197 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
198 --------------------------------------
199 phylo' :: Phylo
200 phylo' = docsToFis' d p
201 --------------------------------------
202
203 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
204
205
206 -- | To reconstruct the Level 0 of a Phylo
207 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
208 toPhylo0 d p = addPhyloLevel 0 d p
209
210
211 class PhyloMaker corpus
212 where
213 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
214 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
215 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
216
217 instance PhyloMaker [(Date, Text)]
218 where
219 --------------------------------------
220 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
221 where
222 --------------------------------------
223 phylo1 :: Phylo
224 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
225 --------------------------------------
226 phylo0 :: Phylo
227 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
228 --------------------------------------
229 phyloDocs :: Map (Date, Date) [Document]
230 phyloDocs = corpusToDocs c phyloBase
231 --------------------------------------
232 phyloBase :: Phylo
233 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
234 --------------------------------------
235 --------------------------------------
236 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
237 where
238 --------------------------------------
239 cooc :: Map Date (Map (Int,Int) Double)
240 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
241 --------------------------------------
242 nbDocs :: Map Date Double
243 nbDocs = countDocs c
244 --------------------------------------
245 foundations :: PhyloFoundations
246 foundations = PhyloFoundations (initFoundationsRoots roots) termList
247 --------------------------------------
248 periods :: [(Date,Date)]
249 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
250 $ both fst (head' "LevelMaker" c,last c)
251 --------------------------------------
252 --------------------------------------
253 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
254
255
256 instance PhyloMaker [Document]
257 where
258 --------------------------------------
259 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
260 where
261 --------------------------------------
262 phylo1 :: Phylo
263 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
264 --------------------------------------
265 phylo0 :: Phylo
266 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
267 --------------------------------------
268 phyloDocs :: Map (Date, Date) [Document]
269 phyloDocs = corpusToDocs c phyloBase
270 --------------------------------------
271 phyloBase :: Phylo
272 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
273 --------------------------------------
274 --------------------------------------
275 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
276 where
277 --------------------------------------
278 cooc :: Map Date (Map (Int,Int) Double)
279 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
280 --------------------------------------
281 nbDocs :: Map Date Double
282 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
283 --------------------------------------
284 foundations :: PhyloFoundations
285 foundations = PhyloFoundations (initFoundationsRoots roots) termList
286 --------------------------------------
287 periods :: [(Date,Date)]
288 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
289 $ both date (head' "LevelMaker" c,last c)
290 --------------------------------------
291 --------------------------------------
292 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
293
294
295 -----------------
296 -- | Tracers | --
297 -----------------
298
299
300 tracePhylo0 :: Phylo -> Phylo
301 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
302 <> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
303
304 tracePhylo1 :: Phylo -> Phylo
305 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
306 <> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
307
308 tracePhyloN :: Level -> Phylo -> Phylo
309 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
310 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
311
312
313 tracePhyloBase :: Phylo -> Phylo
314 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
315 <> show (length $ _phylo_periods p) <> " periods from "
316 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
317 <> " to "
318 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
319 <> "\n"
320 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
321
322
323 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
324 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
325 <> "count : " <> show (length pts) <> " pointers\n"
326 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
327 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
328 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
329 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
330 where
331 --------------------------------------
332 sim :: [Double]
333 sim = sort $ map snd pts
334 --------------------------------------
335 pts :: [Pointer]
336 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
337 --------------------------------------
338
339
340 traceReBranches :: Level -> Phylo -> Phylo
341 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
342 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
343 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
344 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
345 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
346 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
347 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
348 where
349 --------------------------------------
350 brs :: [Double]
351 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
352 $ filter (\(id,_) -> (fst id) == lvl)
353 $ getGroupsByBranches p
354 --------------------------------------
355
356
357 traceBranches :: Level -> Phylo -> Phylo
358 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
359 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
360 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
361 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
362 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
363 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
364 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
365 where
366 --------------------------------------
367 brs :: [Double]
368 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
369 $ filter (\(id,_) -> (fst id) == lvl)
370 $ getGroupsByBranches p
371 --------------------------------------