2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE TypeSynonymInstances #-}
15 module Gargantext.Core.Viz.Phylo.LevelMaker
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, zip, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.Metrics
28 import Gargantext.Core.Viz.Phylo.Aggregates
29 import Gargantext.Core.Viz.Phylo.Cluster
30 import Gargantext.Core.Viz.Phylo.BranchMaker
31 import Gargantext.Core.Viz.Phylo.LinkMaker
32 import Gargantext.Core.Viz.Phylo.Tools
33 import Gargantext.Core.Text.Context (TermList)
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
48 -- | A typeClass for polymorphic PhyloLevel functions
49 class PhyloLevelMaker aggregate
51 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
52 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
53 -- | To create a list of PhyloGroups based on a list of aggregates a
54 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
57 instance PhyloLevelMaker PhyloCluster
59 --------------------------------------
60 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
62 | lvl > 1 = addPhyloLevel' lvl m p
63 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
64 --------------------------------------
65 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
66 toPhyloGroups lvl (d,d') l m p =
67 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
68 clusters' = clusters `using` parList rdeepseq
70 --------------------------------------
73 instance PhyloLevelMaker PhyloFis
75 --------------------------------------
76 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
78 | lvl == 1 = addPhyloLevel' lvl m p
79 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
80 --------------------------------------
81 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
82 toPhyloGroups lvl (d,d') l _ p =
83 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
84 groups' = groups `using` parList rdeepseq
86 --------------------------------------
89 instance PhyloLevelMaker Document
91 --------------------------------------
92 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
94 | lvl == 0 = addPhyloLevel' lvl m p
95 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
96 --------------------------------------
97 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
98 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
101 --------------------------------------
104 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
105 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
106 addPhyloLevel' lvl m p = alterPhyloPeriods
107 (\period -> let pId = _phylo_periodId period
108 in over (phylo_periodLevels)
110 let groups = toPhyloGroups lvl pId (m ! pId) m p
111 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
115 ----------------------
116 -- | toPhyloGroup | --
117 ----------------------
120 -- | To transform a Clique into a PhyloGroup
121 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
122 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
123 (getNgramsMeta cooc ngrams)
125 (singleton "support" (fromIntegral $ getSupport fis))
130 --------------------------------------
131 cooc :: Map (Int, Int) Double
132 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
133 --------------------------------------
135 ngrams = sort $ map (\x -> getIdxInRoots' x root)
138 --------------------------------------
140 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
141 --------------------------------------
144 -- | To transform a Cluster into a Phylogroup
145 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
146 clusterToGroup prd lvl idx lbl groups _m p =
147 PhyloGroup ((prd, lvl), idx) lbl ngrams
148 (getNgramsMeta cooc ngrams)
153 ascLink desLink [] childs
155 --------------------------------------
156 cooc :: Map (Int, Int) Double
157 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
158 --------------------------------------
160 childs = map (\g -> (getGroupId g, 1)) groups
161 ascLink = concat $ map getGroupPeriodParents groups
162 desLink = concat $ map getGroupPeriodChilds groups
163 --------------------------------------
165 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
166 --------------------------------------
169 -- | To transform a list of Ngrams into a PhyloGroup
170 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
171 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
172 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
176 ----------------------
177 -- | toPhyloLevel | --
178 ----------------------
181 -- | To reconstruct the Phylo from a set of Document to a given Level
182 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
183 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
185 --------------------------------------
187 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
188 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
189 --------------------------------------
191 -- phylo0 = tracePhyloN 0
192 -- $ addPhyloLevel 0 phyloDocs phyloBase
193 --------------------------------------
194 phyloDocs :: Map (Date, Date) [Document]
195 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
196 --------------------------------------
198 phyloBase = tracePhyloBase
199 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
200 --------------------------------------
203 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
204 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
205 toNthLevel lvlMax prox clus p
207 | otherwise = toNthLevel lvlMax prox clus
208 $ traceBranches (lvl + 1)
209 $ setPhyloBranches (lvl + 1)
210 -- \$ transposePeriodLinks (lvl + 1)
211 $ traceTranspose (lvl + 1) Descendant
212 $ transposeLinks (lvl + 1) Descendant
213 $ traceTranspose (lvl + 1) Ascendant
214 $ transposeLinks (lvl + 1) Ascendant
215 $ tracePhyloN (lvl + 1)
216 $ setLevelLinks (lvl, lvl + 1)
217 $ addPhyloLevel (lvl + 1)
220 --------------------------------------
221 clusters :: Map (Date,Date) [PhyloCluster]
222 clusters = phyloToClusters lvl clus p
223 --------------------------------------
226 --------------------------------------
229 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
230 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
231 toPhylo1 clus prox d p = case clus of
232 Fis (FisParams k s t) -> traceBranches 1
233 -- \$ reLinkPhyloBranches 1
234 -- \$ traceBranches 1
236 $ traceTempoMatching Descendant 1
237 $ interTempoMatching Descendant 1 prox
238 $ traceTempoMatching Ascendant 1
239 $ interTempoMatching Ascendant 1 prox
241 -- \$ setLevelLinks (0,1)
242 $ addPhyloLevel 1 (getPhyloFis phyloFis)
243 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
245 --------------------------------------
247 phyloFis = if (null $ getPhyloFis p)
248 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
249 else p & phylo_fis .~ docsToFis d p
250 --------------------------------------
252 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
255 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
256 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
257 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
259 --------------------------------------
260 cooc :: Map Date (Map (Int,Int) Double)
261 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
262 --------------------------------------
263 nbDocs :: Map Date Double
264 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
265 --------------------------------------
266 foundations :: PhyloFoundations
267 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
268 --------------------------------------
269 periods :: [(Date,Date)]
270 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
271 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
272 --------------------------------------
280 tracePhyloN :: Level -> Phylo -> Phylo
281 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
282 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
284 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
285 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
288 tracePhyloBase :: Phylo -> Phylo
289 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
290 <> show (length $ _phylo_periods p) <> " periods from "
291 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
293 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
295 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
298 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
299 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
300 <> "count : " <> show (length pts) <> " pointers\n") p
302 --------------------------------------
304 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
305 --------------------------------------
308 traceBranches :: Level -> Phylo -> Phylo
309 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
310 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
311 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
312 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
313 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
314 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
315 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
317 --------------------------------------
319 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
320 $ filter (\(id,_) -> (fst id) == lvl)
321 $ getGroupsByBranches p
322 --------------------------------------