2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Aggregates.Cluster
31 import Gargantext.Viz.Phylo.Aggregates.Document
32 import Gargantext.Viz.Phylo.Aggregates.Fis
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Viz.Phylo.Aggregates.Cooc
37 import Gargantext.Text.Context (TermList)
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
56 instance PhyloLevelMaker PhyloCluster
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 | lvl > 1 = toPhyloLevel lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
69 --------------------------------------
72 instance PhyloLevelMaker PhyloFis
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
77 | lvl == 1 = toPhyloLevel lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
85 --------------------------------------
88 instance PhyloLevelMaker Document
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
93 | lvl == 0 = toPhyloLevel lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
100 --------------------------------------
103 -- | To transform a Cluster into a Phylogroup
104 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
105 clusterToGroup prd lvl idx lbl groups _m p =
106 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
108 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
109 ascLink desLink [] childs
111 --------------------------------------
113 childs = map (\g -> (getGroupId g, 1)) groups
114 ascLink = concat $ map getGroupPeriodParents groups
115 desLink = concat $ map getGroupPeriodChilds groups
116 --------------------------------------
118 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
119 --------------------------------------
122 -- | To transform a Clique into a PhyloGroup
123 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
124 cliqueToGroup prd lvl idx lbl fis p =
125 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
126 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
129 --------------------------------------
131 ngrams = sort $ map (\x -> getIdxInRoots x p)
134 --------------------------------------
136 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
137 --------------------------------------
140 -- | To transform a list of Ngrams into a PhyloGroup
141 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
142 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
143 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
147 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
148 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
149 toPhyloLevel lvl m p = alterPhyloPeriods
150 (\period -> let pId = _phylo_periodId period
151 in over (phylo_periodLevels)
153 let groups = toPhyloGroups lvl pId (m ! pId) m p
154 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
158 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
159 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
160 toNthLevel lvlMax prox clus p
162 | otherwise = toNthLevel lvlMax prox clus
163 $ traceBranches (lvl + 1)
164 $ setPhyloBranches (lvl + 1)
165 $ transposePeriodLinks (lvl + 1)
166 $ tracePhyloN (lvl + 1)
167 $ setLevelLinks (lvl, lvl + 1)
168 $ addPhyloLevel (lvl + 1)
171 --------------------------------------
172 clusters :: Map (Date,Date) [PhyloCluster]
173 clusters = phyloToClusters lvl clus p
174 --------------------------------------
177 --------------------------------------
180 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
181 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
182 toPhylo1 clus prox d p = case clus of
183 Fis (FisParams k s t) -> traceReBranches 1
184 -- $ linkPhyloBranches 1 prox
187 $ traceTempoMatching Descendant 1
188 $ interTempoMatching Descendant 1 prox
189 $ traceTempoMatching Ascendant 1
190 $ interTempoMatching Ascendant 1 prox
192 $ setLevelLinks (0,1)
193 $ addPhyloLevel 1 phyloFis phylo'
195 --------------------------------------
196 phyloFis :: Map (Date, Date) [PhyloFis]
197 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
198 --------------------------------------
200 phylo' = docsToFis' d p
201 --------------------------------------
203 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
206 -- | To reconstruct the Level 0 of a Phylo
207 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
208 toPhylo0 d p = addPhyloLevel 0 d p
211 class PhyloMaker corpus
213 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
214 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
215 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
217 instance PhyloMaker [(Date, Text)]
219 --------------------------------------
220 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
222 --------------------------------------
224 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
225 --------------------------------------
227 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
228 --------------------------------------
229 phyloDocs :: Map (Date, Date) [Document]
230 phyloDocs = corpusToDocs c phyloBase
231 --------------------------------------
233 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
234 --------------------------------------
235 --------------------------------------
236 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
238 --------------------------------------
239 cooc :: Map Date (Map (Int,Int) Double)
240 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
241 --------------------------------------
242 nbDocs :: Map Date Double
244 --------------------------------------
245 foundations :: PhyloFoundations
246 foundations = PhyloFoundations (initFoundationsRoots roots) termList
247 --------------------------------------
248 periods :: [(Date,Date)]
249 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
250 $ both fst (head' "LevelMaker" c,last c)
251 --------------------------------------
252 --------------------------------------
253 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
256 instance PhyloMaker [Document]
258 --------------------------------------
259 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
261 --------------------------------------
263 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
264 --------------------------------------
266 phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
267 --------------------------------------
268 phyloDocs :: Map (Date, Date) [Document]
269 phyloDocs = corpusToDocs c phyloBase
270 --------------------------------------
272 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
273 --------------------------------------
274 --------------------------------------
275 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
277 --------------------------------------
278 cooc :: Map Date (Map (Int,Int) Double)
279 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
280 --------------------------------------
281 nbDocs :: Map Date Double
282 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
283 --------------------------------------
284 foundations :: PhyloFoundations
285 foundations = PhyloFoundations (initFoundationsRoots roots) termList
286 --------------------------------------
287 periods :: [(Date,Date)]
288 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
289 $ both date (head' "LevelMaker" c,last c)
290 --------------------------------------
291 --------------------------------------
292 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
300 tracePhylo0 :: Phylo -> Phylo
301 tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
302 <> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
304 tracePhylo1 :: Phylo -> Phylo
305 tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
306 <> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
308 tracePhyloN :: Level -> Phylo -> Phylo
309 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
310 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
313 tracePhyloBase :: Phylo -> Phylo
314 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
315 <> show (length $ _phylo_periods p) <> " periods from "
316 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
318 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
320 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
323 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
324 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
325 <> "count : " <> show (length pts) <> " pointers\n"
326 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
327 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
328 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
329 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
331 --------------------------------------
333 sim = sort $ map snd pts
334 --------------------------------------
336 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
337 --------------------------------------
340 traceReBranches :: Level -> Phylo -> Phylo
341 traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
342 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
343 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
344 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
345 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
346 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
347 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
349 --------------------------------------
351 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
352 $ filter (\(id,_) -> (fst id) == lvl)
353 $ getGroupsByBranches p
354 --------------------------------------
357 traceBranches :: Level -> Phylo -> Phylo
358 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
359 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
360 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
361 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
362 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
363 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
364 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
366 --------------------------------------
368 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
369 $ filter (\(id,_) -> (fst id) == lvl)
370 $ getGroupsByBranches p
371 --------------------------------------