2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE TypeSynonymInstances #-}
15 module Gargantext.Core.Viz.Phylo.LevelMaker
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.Metrics
28 import Gargantext.Core.Viz.Phylo.Aggregates
29 import Gargantext.Core.Viz.Phylo.Cluster
30 import Gargantext.Core.Viz.Phylo.BranchMaker
31 import Gargantext.Core.Viz.Phylo.LinkMaker
32 import Gargantext.Core.Viz.Phylo.Tools
33 import Gargantext.Core.Text.Context (TermList)
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
47 -- | A typeClass for polymorphic PhyloLevel functions
48 class PhyloLevelMaker aggregate
50 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
51 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
52 -- | To create a list of PhyloGroups based on a list of aggregates a
53 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
56 instance PhyloLevelMaker PhyloCluster
58 --------------------------------------
59 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
61 | lvl > 1 = addPhyloLevel' lvl m p
62 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
63 --------------------------------------
64 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
65 toPhyloGroups lvl (d,d') l m p =
66 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
67 clusters' = clusters `using` parList rdeepseq
69 --------------------------------------
72 instance PhyloLevelMaker PhyloFis
74 --------------------------------------
75 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
77 | lvl == 1 = addPhyloLevel' lvl m p
78 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
79 --------------------------------------
80 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
81 toPhyloGroups lvl (d,d') l _ p =
82 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
83 groups' = groups `using` parList rdeepseq
85 --------------------------------------
88 instance PhyloLevelMaker Document
90 --------------------------------------
91 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
93 | lvl == 0 = addPhyloLevel' lvl m p
94 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
95 --------------------------------------
96 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
97 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
100 --------------------------------------
103 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
104 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
105 addPhyloLevel' lvl m p = alterPhyloPeriods
106 (\period -> let pId = _phylo_periodId period
107 in over phylo_periodLevels
109 let groups = toPhyloGroups lvl pId (m ! pId) m p
110 in trace (show (length groups)
113 $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
118 ----------------------
119 -- | toPhyloGroup | --
120 ----------------------
123 -- | To transform a Clique into a PhyloGroup
124 cliqueToGroup :: PhyloPeriodId
129 -> Map Date (Map (Int,Int) Double)
132 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
133 (getNgramsMeta cooc ngrams)
135 (singleton "support" (fromIntegral $ getSupport fis))
140 --------------------------------------
141 cooc :: Map (Int, Int) Double
142 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
143 --------------------------------------
145 ngrams = sort $ map (\x -> getIdxInRoots' x root)
148 --------------------------------------
150 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
151 --------------------------------------
154 -- | To transform a Cluster into a Phylogroup
155 clusterToGroup :: PhyloPeriodId
160 -> Map (Date,Date) [PhyloCluster]
163 clusterToGroup prd lvl idx lbl groups _m p =
164 PhyloGroup ((prd, lvl), idx) lbl ngrams
165 (getNgramsMeta cooc ngrams)
170 ascLink desLink [] childs
172 --------------------------------------
173 cooc :: Map (Int, Int) Double
174 cooc = getMiniCooc (listToFullCombi ngrams)
175 (periodsToYears [prd] )
177 --------------------------------------
179 childs = map (\g -> (getGroupId g, 1)) groups
180 ascLink = concat $ map getGroupPeriodParents groups
181 desLink = concat $ map getGroupPeriodChilds groups
182 --------------------------------------
184 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
185 --------------------------------------
188 -- | To transform a list of Ngrams into a PhyloGroup
189 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
190 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
191 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
195 ----------------------
196 -- | toPhyloLevel | --
197 ----------------------
200 -- | To reconstruct the Phylo from a set of Document to a given Level
201 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
202 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
204 --------------------------------------
206 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
207 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
208 --------------------------------------
210 -- phylo0 = tracePhyloN 0
211 -- $ addPhyloLevel 0 phyloDocs phyloBase
212 --------------------------------------
213 phyloDocs :: Map (Date, Date) [Document]
214 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
215 --------------------------------------
217 phyloBase = tracePhyloBase
218 $ toPhyloBase q init c termList fis
220 init = initPhyloParam (Just defaultPhyloVersion)
221 (Just defaultSoftware )
223 ---------------------------------------
226 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
227 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
228 toNthLevel lvlMax prox clus p
230 | otherwise = toNthLevel lvlMax prox clus
231 $ traceBranches (lvl + 1)
232 $ setPhyloBranches (lvl + 1)
233 -- \$ transposePeriodLinks (lvl + 1)
234 $ traceTranspose (lvl + 1) Descendant
235 $ transposeLinks (lvl + 1) Descendant
236 $ traceTranspose (lvl + 1) Ascendant
237 $ transposeLinks (lvl + 1) Ascendant
238 $ tracePhyloN (lvl + 1)
239 $ setLevelLinks (lvl, lvl + 1)
240 $ addPhyloLevel (lvl + 1) (clusters) p
242 --------------------------------------
243 clusters :: Map (Date,Date) [PhyloCluster]
244 clusters = phyloToClusters lvl clus p
245 --------------------------------------
248 --------------------------------------
251 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
252 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
253 toPhylo1 clus prox d p = case clus of
254 Fis (FisParams k s t) -> traceBranches 1
255 -- \$ reLinkPhyloBranches 1
256 -- \$ traceBranches 1
258 $ traceTempoMatching Descendant 1
259 $ interTempoMatching Descendant 1 prox
260 $ traceTempoMatching Ascendant 1
261 $ interTempoMatching Ascendant 1 prox
263 -- \$ setLevelLinks (0,1)
264 $ addPhyloLevel 1 (getPhyloFis phyloFis)
265 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
267 --------------------------------------
269 phyloFis = if (null $ getPhyloFis p)
270 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
271 else p & phylo_fis .~ docsToFis d p
272 --------------------------------------
274 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
277 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
278 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
279 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
281 --------------------------------------
282 cooc :: Map Date (Map (Int,Int) Double)
283 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
284 --------------------------------------
285 nbDocs :: Map Date Double
286 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
287 --------------------------------------
288 foundations :: PhyloFoundations
289 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
290 --------------------------------------
291 periods :: [(Date,Date)]
292 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
293 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
294 --------------------------------------
302 tracePhyloN :: Level -> Phylo -> Phylo
303 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
304 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
306 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
307 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
310 tracePhyloBase :: Phylo -> Phylo
311 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
312 <> show (length $ _phylo_periods p) <> " periods from "
313 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
315 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
317 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
320 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
321 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
322 <> "count : " <> show (length pts) <> " pointers\n") p
324 --------------------------------------
326 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
327 --------------------------------------
330 traceBranches :: Level -> Phylo -> Phylo
331 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
332 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
333 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
334 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
335 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
336 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
337 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
339 --------------------------------------
341 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
342 $ filter (\(id,_) -> (fst id) == lvl)
343 $ getGroupsByBranches p
344 --------------------------------------