2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Aggregates.Cluster
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Viz.Phylo.Aggregates.Cooc
37 import Gargantext.Text.Context (TermList)
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
56 instance PhyloLevelMaker PhyloCluster
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
69 --------------------------------------
72 instance PhyloLevelMaker PhyloFis
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
77 | lvl == 1 = toPhyloLevel lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
85 --------------------------------------
88 instance PhyloLevelMaker Document
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
93 | lvl == 0 = toPhyloLevel lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
100 --------------------------------------
103 -- | To transform a Cluster into a Phylogroup
104 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
105 clusterToGroup prd lvl idx lbl groups _m p =
106 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
108 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
109 ascLink desLink [] childs
111 --------------------------------------
113 childs = map (\g -> (getGroupId g, 1)) groups
114 ascLink = concat $ map getGroupPeriodParents groups
115 desLink = concat $ map getGroupPeriodChilds groups
116 --------------------------------------
118 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
119 --------------------------------------
122 -- | To transform a Clique into a PhyloGroup
123 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
124 cliqueToGroup prd lvl idx lbl fis p =
125 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
126 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
129 --------------------------------------
131 ngrams = sort $ map (\x -> getIdxInRoots x p)
134 --------------------------------------
136 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
137 --------------------------------------
140 -- | To transform a list of Ngrams into a PhyloGroup
141 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
142 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
143 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
147 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
148 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
149 toPhyloLevel lvl m p = alterPhyloPeriods
150 (\period -> let pId = _phylo_periodId period
151 in over (phylo_periodLevels)
153 let groups = toPhyloGroups lvl pId (m ! pId) m p
154 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
158 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
159 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
160 toNthLevel lvlMax prox clus p
162 | otherwise = toNthLevel lvlMax prox clus
163 $ traceBranches (lvl + 1)
164 $ setPhyloBranches (lvl + 1)
165 $ traceTranspose (lvl + 1)
166 $ transposePeriodLinks (lvl + 1)
167 $ tracePhyloN (lvl + 1)
168 $ setLevelLinks (lvl, lvl + 1)
169 $ addPhyloLevel (lvl + 1)
172 --------------------------------------
173 clusters :: Map (Date,Date) [PhyloCluster]
174 clusters = phyloToClusters lvl clus p
175 --------------------------------------
178 --------------------------------------
181 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
182 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
183 toPhylo1 clus prox d p = case clus of
184 Fis (FisParams k s t) -> traceReBranches 1
185 -- $ reLinkPhyloBranches 1
188 $ traceTempoMatching Descendant 1
189 $ interTempoMatching Descendant 1 prox
190 $ traceTempoMatching Ascendant 1
191 $ interTempoMatching Ascendant 1 prox
193 $ setLevelLinks (0,1)
194 $ addPhyloLevel 1 phyloFis phylo'
196 --------------------------------------
197 phyloFis :: Map (Date, Date) [PhyloFis]
198 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
199 --------------------------------------
201 phylo' = docsToFis' d p
202 --------------------------------------
204 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
207 -- | To reconstruct the Level 0 of a Phylo
208 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
209 toPhylo0 d p = addPhyloLevel 0 d p
212 class PhyloMaker corpus
214 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
215 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
216 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
218 instance PhyloMaker [(Date, Text)]
220 --------------------------------------
221 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
223 --------------------------------------
225 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
226 --------------------------------------
228 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
229 --------------------------------------
230 phyloDocs :: Map (Date, Date) [Document]
231 phyloDocs = corpusToDocs c phyloBase
232 --------------------------------------
234 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
235 --------------------------------------
236 --------------------------------------
237 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
239 --------------------------------------
240 cooc :: Map Date (Map (Int,Int) Double)
241 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
242 --------------------------------------
243 nbDocs :: Map Date Double
245 --------------------------------------
246 foundations :: PhyloFoundations
247 foundations = PhyloFoundations (initFoundationsRoots roots) termList
248 --------------------------------------
249 periods :: [(Date,Date)]
250 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
251 $ both fst (head' "LevelMaker" c,last c)
252 --------------------------------------
253 --------------------------------------
254 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
257 instance PhyloMaker [Document]
259 --------------------------------------
260 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
262 --------------------------------------
264 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
265 --------------------------------------
267 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
268 --------------------------------------
269 phyloDocs :: Map (Date, Date) [Document]
270 phyloDocs = corpusToDocs c phyloBase
271 --------------------------------------
273 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
274 --------------------------------------
275 --------------------------------------
276 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
278 --------------------------------------
279 cooc :: Map Date (Map (Int,Int) Double)
280 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
281 --------------------------------------
282 nbDocs :: Map Date Double
283 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
284 --------------------------------------
285 foundations :: PhyloFoundations
286 foundations = PhyloFoundations (initFoundationsRoots roots) termList
287 --------------------------------------
288 periods :: [(Date,Date)]
289 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
290 $ both date (head' "LevelMaker" c,last c)
291 --------------------------------------
292 --------------------------------------
293 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
301 tracePhylo0 :: Phylo -> Phylo
302 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
303 <> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
305 tracePhylo1 :: Phylo -> Phylo
306 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
307 <> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
309 tracePhyloN :: Level -> Phylo -> Phylo
310 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
311 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
314 traceTranspose :: Level -> Phylo -> Phylo
315 traceTranspose lvl p = trace ("----\nTranspose "
316 <> show (length $ getGroupsWithLevel lvl p) <> " groups in Phylo "
317 <> show (lvl) <> "\n") p
320 tracePhyloBase :: Phylo -> Phylo
321 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
322 <> show (length $ _phylo_periods p) <> " periods from "
323 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
325 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
327 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
330 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
331 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
332 <> "count : " <> show (length pts) <> " pointers\n"
333 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
334 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
335 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
336 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
338 --------------------------------------
340 sim = sort $ map snd pts
341 --------------------------------------
343 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
344 --------------------------------------
347 traceReBranches :: Level -> Phylo -> Phylo
348 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
349 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
350 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
351 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
352 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
353 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
354 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
356 --------------------------------------
358 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
359 $ filter (\(id,_) -> (fst id) == lvl)
360 $ getGroupsByBranches p
361 --------------------------------------
364 traceBranches :: Level -> Phylo -> Phylo
365 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
366 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
367 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
368 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
369 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
370 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
371 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
373 --------------------------------------
375 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
376 $ filter (\(id,_) -> (fst id) == lvl)
377 $ getGroupsByBranches p
378 --------------------------------------