2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last, null)
25 import Data.Map (Map, (!), empty, singleton, size)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Data.Vector (Vector)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Phylo
31 import Gargantext.Viz.Phylo.Metrics
32 import Gargantext.Viz.Phylo.Aggregates
33 import Gargantext.Viz.Phylo.Cluster
34 import Gargantext.Viz.Phylo.BranchMaker
35 import Gargantext.Viz.Phylo.LinkMaker
36 import Gargantext.Viz.Phylo.Tools
37 import Gargantext.Text.Context (TermList)
39 import qualified Data.Vector.Storable as VS
40 import qualified Data.Set as Set
41 import qualified Data.Vector as Vector
43 import Debug.Trace (trace)
44 import Numeric.Statistics (percentile)
47 -------------------------
48 -- | PhyloLevelMaker | --
49 -------------------------
52 -- | A typeClass for polymorphic PhyloLevel functions
53 class PhyloLevelMaker aggregate
55 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
56 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
57 -- | To create a list of PhyloGroups based on a list of aggregates a
58 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
61 instance PhyloLevelMaker PhyloCluster
63 --------------------------------------
64 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
66 | lvl > 1 = addPhyloLevel' lvl m p
67 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
68 --------------------------------------
69 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
70 toPhyloGroups lvl (d,d') l m p =
71 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
72 clusters' = clusters `using` parList rdeepseq
74 --------------------------------------
77 instance PhyloLevelMaker PhyloFis
79 --------------------------------------
80 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
82 | lvl == 1 = addPhyloLevel' lvl m p
83 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
84 --------------------------------------
85 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
86 toPhyloGroups lvl (d,d') l _ p =
87 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
88 groups' = groups `using` parList rdeepseq
90 --------------------------------------
93 instance PhyloLevelMaker Document
95 --------------------------------------
96 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
98 | lvl == 0 = addPhyloLevel' lvl m p
99 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
100 --------------------------------------
101 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
102 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
105 --------------------------------------
108 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
109 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
110 addPhyloLevel' lvl m p = alterPhyloPeriods
111 (\period -> let pId = _phylo_periodId period
112 in over (phylo_periodLevels)
114 let groups = toPhyloGroups lvl pId (m ! pId) m p
115 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
119 ----------------------
120 -- | toPhyloGroup | --
121 ----------------------
124 -- | To transform a Clique into a PhyloGroup
125 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
126 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
127 (getNgramsMeta cooc ngrams)
129 (singleton "support" (fromIntegral $ getSupport fis))
134 --------------------------------------
135 cooc :: Map (Int, Int) Double
136 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
137 --------------------------------------
139 ngrams = sort $ map (\x -> getIdxInRoots' x root)
142 --------------------------------------
144 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
145 --------------------------------------
148 -- | To transform a Cluster into a Phylogroup
149 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
150 clusterToGroup prd lvl idx lbl groups _m p =
151 PhyloGroup ((prd, lvl), idx) lbl ngrams
152 (getNgramsMeta cooc ngrams)
157 ascLink desLink [] childs
159 --------------------------------------
160 cooc :: Map (Int, Int) Double
161 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
162 --------------------------------------
164 childs = map (\g -> (getGroupId g, 1)) groups
165 ascLink = concat $ map getGroupPeriodParents groups
166 desLink = concat $ map getGroupPeriodChilds groups
167 --------------------------------------
169 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
170 --------------------------------------
173 -- | To transform a list of Ngrams into a PhyloGroup
174 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
175 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
176 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
180 ----------------------
181 -- | toPhyloLevel | --
182 ----------------------
185 -- | To reconstruct the Phylo from a set of Document to a given Level
186 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
187 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
189 --------------------------------------
191 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
192 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
193 --------------------------------------
195 -- phylo0 = tracePhyloN 0
196 -- $ addPhyloLevel 0 phyloDocs phyloBase
197 --------------------------------------
198 phyloDocs :: Map (Date, Date) [Document]
199 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
200 --------------------------------------
202 phyloBase = tracePhyloBase
203 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
204 --------------------------------------
207 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
208 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
209 toNthLevel lvlMax prox clus p
211 | otherwise = toNthLevel lvlMax prox clus
212 $ traceBranches (lvl + 1)
213 $ setPhyloBranches (lvl + 1)
214 -- $ transposePeriodLinks (lvl + 1)
215 $ traceTranspose (lvl + 1) Descendant
216 $ transposeLinks (lvl + 1) Descendant
217 $ traceTranspose (lvl + 1) Ascendant
218 $ transposeLinks (lvl + 1) Ascendant
219 $ tracePhyloN (lvl + 1)
220 $ setLevelLinks (lvl, lvl + 1)
221 $ addPhyloLevel (lvl + 1)
224 --------------------------------------
225 clusters :: Map (Date,Date) [PhyloCluster]
226 clusters = phyloToClusters lvl clus p
227 --------------------------------------
230 --------------------------------------
233 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
234 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
235 toPhylo1 clus prox d p = case clus of
236 Fis (FisParams k s t) -> traceBranches 1
237 -- $ reLinkPhyloBranches 1
240 $ traceTempoMatching Descendant 1
241 $ interTempoMatching Descendant 1 prox
242 $ traceTempoMatching Ascendant 1
243 $ interTempoMatching Ascendant 1 prox
245 -- $ setLevelLinks (0,1)
246 $ addPhyloLevel 1 (getPhyloFis phyloFis)
247 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
249 --------------------------------------
251 phyloFis = if (null $ getPhyloFis p)
252 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
253 else p & phylo_fis .~ docsToFis d p
254 --------------------------------------
256 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
259 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
260 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
261 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
263 --------------------------------------
264 cooc :: Map Date (Map (Int,Int) Double)
265 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
266 --------------------------------------
267 nbDocs :: Map Date Double
268 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
269 --------------------------------------
270 foundations :: PhyloFoundations
271 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
272 --------------------------------------
273 periods :: [(Date,Date)]
274 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
275 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
276 --------------------------------------
284 tracePhyloN :: Level -> Phylo -> Phylo
285 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
286 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
288 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
289 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
292 tracePhyloBase :: Phylo -> Phylo
293 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
294 <> show (length $ _phylo_periods p) <> " periods from "
295 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
297 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
299 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
302 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
303 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
304 <> "count : " <> show (length pts) <> " pointers\n") p
306 --------------------------------------
308 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
309 --------------------------------------
312 traceBranches :: Level -> Phylo -> Phylo
313 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
314 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
315 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
316 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
317 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
318 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
319 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
321 --------------------------------------
323 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
324 $ filter (\(id,_) -> (fst id) == lvl)
325 $ getGroupsByBranches p
326 --------------------------------------