2 Module : Gargantext.Core.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE TypeSynonymInstances #-}
15 module Gargantext.Core.Viz.Phylo.LevelMaker
18 import Control.Parallel.Strategies
19 import Control.Lens hiding (both, Level)
20 import Data.List ((++), sort, concat, nub, last, null)
21 import Data.Map (Map, (!), empty, singleton, size)
22 import Data.Text (Text)
23 import Data.Tuple.Extra
24 import Data.Vector (Vector)
25 import Gargantext.Prelude
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.Metrics
28 import Gargantext.Core.Viz.Phylo.Aggregates
29 import Gargantext.Core.Viz.Phylo.Cluster
30 import Gargantext.Core.Viz.Phylo.BranchMaker
31 import Gargantext.Core.Viz.Phylo.LinkMaker
32 import Gargantext.Core.Viz.Phylo.Tools
33 import Gargantext.Core.Text.Context (TermList)
35 import qualified Data.Vector.Storable as VS
36 import qualified Data.Set as Set
37 import qualified Data.Vector as Vector
39 import Debug.Trace (trace)
40 import Numeric.Statistics (percentile)
43 -------------------------
44 -- | PhyloLevelMaker | --
45 -------------------------
48 -- | A typeClass for polymorphic PhyloLevel functions
49 class PhyloLevelMaker aggregate
51 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
52 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
53 -- | To create a list of PhyloGroups based on a list of aggregates a
54 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
57 instance PhyloLevelMaker PhyloCluster
59 --------------------------------------
60 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
62 | lvl > 1 = addPhyloLevel' lvl m p
63 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
64 --------------------------------------
65 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
66 toPhyloGroups lvl (d,d') l m p =
67 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
68 clusters' = clusters `using` parList rdeepseq
70 --------------------------------------
73 instance PhyloLevelMaker PhyloFis
75 --------------------------------------
76 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
78 | lvl == 1 = addPhyloLevel' lvl m p
79 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
80 --------------------------------------
81 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
82 toPhyloGroups lvl (d,d') l _ p =
83 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
84 groups' = groups `using` parList rdeepseq
86 --------------------------------------
89 instance PhyloLevelMaker Document
91 --------------------------------------
92 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
94 | lvl == 0 = addPhyloLevel' lvl m p
95 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
96 --------------------------------------
97 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
98 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
101 --------------------------------------
104 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
105 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
106 addPhyloLevel' lvl m p = alterPhyloPeriods
107 (\period -> let pId = _phylo_periodId period
108 in over (phylo_periodLevels)
110 let groups = toPhyloGroups lvl pId (m ! pId) m p
111 in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
115 ----------------------
116 -- | toPhyloGroup | --
117 ----------------------
120 -- | To transform a Clique into a PhyloGroup
121 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
122 cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
123 (getNgramsMeta cooc ngrams)
125 (singleton "support" (fromIntegral $ getSupport fis))
130 --------------------------------------
131 cooc :: Map (Int, Int) Double
132 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
133 --------------------------------------
135 ngrams = sort $ map (\x -> getIdxInRoots' x root)
138 --------------------------------------
140 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
141 --------------------------------------
144 -- | To transform a Cluster into a Phylogroup
145 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
146 clusterToGroup prd lvl idx lbl groups _m p =
147 PhyloGroup ((prd, lvl), idx) lbl ngrams
148 (getNgramsMeta cooc ngrams)
153 ascLink desLink [] childs
155 --------------------------------------
156 cooc :: Map (Int, Int) Double
157 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
158 --------------------------------------
160 childs = map (\g -> (getGroupId g, 1)) groups
161 ascLink = concat $ map getGroupPeriodParents groups
162 desLink = concat $ map getGroupPeriodChilds groups
163 --------------------------------------
165 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
166 --------------------------------------
169 -- | To transform a list of Ngrams into a PhyloGroup
170 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
171 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
172 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
176 ----------------------
177 -- | toPhyloLevel | --
178 ----------------------
181 -- | To reconstruct the Phylo from a set of Document to a given Level
182 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
183 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
185 --------------------------------------
187 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
188 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
189 --------------------------------------
191 -- phylo0 = tracePhyloN 0
192 -- $ addPhyloLevel 0 phyloDocs phyloBase
193 --------------------------------------
194 phyloDocs :: Map (Date, Date) [Document]
195 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
196 --------------------------------------
198 phyloBase = tracePhyloBase
199 $ toPhyloBase q init c termList fis
201 init = initPhyloParam (Just defaultPhyloVersion)
202 (Just defaultSoftware )
204 ---------------------------------------
207 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
208 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
209 toNthLevel lvlMax prox clus p
211 | otherwise = toNthLevel lvlMax prox clus
212 $ traceBranches (lvl + 1)
213 $ setPhyloBranches (lvl + 1)
214 -- \$ transposePeriodLinks (lvl + 1)
215 $ traceTranspose (lvl + 1) Descendant
216 $ transposeLinks (lvl + 1) Descendant
217 $ traceTranspose (lvl + 1) Ascendant
218 $ transposeLinks (lvl + 1) Ascendant
219 $ tracePhyloN (lvl + 1)
220 $ setLevelLinks (lvl, lvl + 1)
221 $ addPhyloLevel (lvl + 1) (clusters) p
223 --------------------------------------
224 clusters :: Map (Date,Date) [PhyloCluster]
225 clusters = phyloToClusters lvl clus p
226 --------------------------------------
229 --------------------------------------
232 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
233 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
234 toPhylo1 clus prox d p = case clus of
235 Fis (FisParams k s t) -> traceBranches 1
236 -- \$ reLinkPhyloBranches 1
237 -- \$ traceBranches 1
239 $ traceTempoMatching Descendant 1
240 $ interTempoMatching Descendant 1 prox
241 $ traceTempoMatching Ascendant 1
242 $ interTempoMatching Ascendant 1 prox
244 -- \$ setLevelLinks (0,1)
245 $ addPhyloLevel 1 (getPhyloFis phyloFis)
246 $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
248 --------------------------------------
250 phyloFis = if (null $ getPhyloFis p)
251 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
252 else p & phylo_fis .~ docsToFis d p
253 --------------------------------------
255 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
258 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
259 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
260 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
262 --------------------------------------
263 cooc :: Map Date (Map (Int,Int) Double)
264 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
265 --------------------------------------
266 nbDocs :: Map Date Double
267 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
268 --------------------------------------
269 foundations :: PhyloFoundations
270 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
271 --------------------------------------
272 periods :: [(Date,Date)]
273 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
274 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
275 --------------------------------------
283 tracePhyloN :: Level -> Phylo -> Phylo
284 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
285 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
287 traceTranspose :: Level -> Filiation -> Phylo -> Phylo
288 traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
291 tracePhyloBase :: Phylo -> Phylo
292 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
293 <> show (length $ _phylo_periods p) <> " periods from "
294 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
296 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
298 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
301 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
302 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
303 <> "count : " <> show (length pts) <> " pointers\n") p
305 --------------------------------------
307 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
308 --------------------------------------
311 traceBranches :: Level -> Phylo -> Phylo
312 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
313 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
314 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
315 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
316 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
317 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
318 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
320 --------------------------------------
322 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
323 $ filter (\(id,_) -> (fst id) == lvl)
324 $ getGroupsByBranches p
325 --------------------------------------