2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last, null)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Metrics
31 import Gargantext.Viz.Phylo.Aggregates
32 import Gargantext.Viz.Phylo.Cluster
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
46 -------------------------
47 -- | PhyloLevelMaker | --
48 -------------------------
51 -- | A typeClass for polymorphic PhyloLevel functions
52 class PhyloLevelMaker aggregate
54 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
55 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
56 -- | To create a list of PhyloGroups based on a list of aggregates a
57 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
60 instance PhyloLevelMaker PhyloCluster
62 --------------------------------------
63 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
65 | lvl > 1 = addPhyloLevel' lvl m p
66 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
67 --------------------------------------
68 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
69 toPhyloGroups lvl (d,d') l m p =
70 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
71 clusters' = clusters `using` parList rdeepseq
73 --------------------------------------
76 instance PhyloLevelMaker PhyloFis
78 --------------------------------------
79 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
81 | lvl == 1 = addPhyloLevel' lvl m p
82 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
83 --------------------------------------
84 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
85 toPhyloGroups lvl (d,d') l _ p =
86 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
87 groups' = groups `using` parList rdeepseq
89 --------------------------------------
92 instance PhyloLevelMaker Document
94 --------------------------------------
95 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
97 | lvl == 0 = addPhyloLevel' lvl m p
98 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
99 --------------------------------------
100 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
101 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
104 --------------------------------------
107 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
108 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
109 addPhyloLevel' lvl m p = alterPhyloPeriods
110 (\period -> let pId = _phylo_periodId period
111 in over (phylo_periodLevels)
113 let groups = toPhyloGroups lvl pId (m ! pId) m p
114 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
118 ----------------------
119 -- | toPhyloGroup | --
120 ----------------------
123 -- | To transform a Clique into a PhyloGroup
124 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
125 cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
126 (getNgramsMeta cooc ngrams)
128 (singleton "support" (fromIntegral $ getSupport fis))
133 --------------------------------------
134 cooc :: Map (Int, Int) Double
135 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
136 --------------------------------------
138 ngrams = sort $ map (\x -> getIdxInRoots x p)
141 --------------------------------------
143 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
144 --------------------------------------
147 -- | To transform a Cluster into a Phylogroup
148 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
149 clusterToGroup prd lvl idx lbl groups _m p =
150 PhyloGroup ((prd, lvl), idx) lbl ngrams
151 (getNgramsMeta cooc ngrams)
156 ascLink desLink [] childs
158 --------------------------------------
159 cooc :: Map (Int, Int) Double
160 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
161 --------------------------------------
163 childs = map (\g -> (getGroupId g, 1)) groups
164 ascLink = concat $ map getGroupPeriodParents groups
165 desLink = concat $ map getGroupPeriodChilds groups
166 --------------------------------------
168 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
169 --------------------------------------
172 -- | To transform a list of Ngrams into a PhyloGroup
173 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
174 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
175 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
179 ----------------------
180 -- | toPhyloLevel | --
181 ----------------------
184 -- | To reconstruct the Phylo from a set of Document to a given Level
185 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
186 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
188 --------------------------------------
190 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
191 -- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
192 --------------------------------------
194 -- phylo0 = tracePhyloN 0
195 -- $ addPhyloLevel 0 phyloDocs phyloBase
196 --------------------------------------
197 phyloDocs :: Map (Date, Date) [Document]
198 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
199 --------------------------------------
201 phyloBase = tracePhyloBase
202 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
203 --------------------------------------
206 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
207 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
208 toNthLevel lvlMax prox clus p
210 | otherwise = toNthLevel lvlMax prox clus
211 $ traceBranches (lvl + 1)
212 $ setPhyloBranches (lvl + 1)
213 $ transposePeriodLinks (lvl + 1)
214 $ tracePhyloN (lvl + 1)
215 $ setLevelLinks (lvl, lvl + 1)
216 $ addPhyloLevel (lvl + 1)
219 --------------------------------------
220 clusters :: Map (Date,Date) [PhyloCluster]
221 clusters = phyloToClusters lvl clus p
222 --------------------------------------
225 --------------------------------------
228 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
229 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
230 toPhylo1 clus prox d p = case clus of
231 Fis (FisParams k s t) -> traceBranches 1
232 -- $ reLinkPhyloBranches 1
235 $ traceTempoMatching Descendant 1
236 $ interTempoMatching Descendant 1 prox
237 $ traceTempoMatching Ascendant 1
238 $ interTempoMatching Ascendant 1 prox
240 -- $ setLevelLinks (0,1)
241 $ addPhyloLevel 1 (getPhyloFis phyloFis) phyloFis
243 --------------------------------------
245 phyloFis = if (null $ getPhyloFis p)
246 then p & phylo_fis .~ refineFis (docsToFis d p) k s t
247 else p & phylo_fis .~ docsToFis d p
248 --------------------------------------
250 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
253 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
254 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
255 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
257 --------------------------------------
258 cooc :: Map Date (Map (Int,Int) Double)
259 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
260 --------------------------------------
261 nbDocs :: Map Date Double
262 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
263 --------------------------------------
264 foundations :: PhyloFoundations
265 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
266 --------------------------------------
267 periods :: [(Date,Date)]
268 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
269 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
270 --------------------------------------
278 tracePhyloN :: Level -> Phylo -> Phylo
279 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
280 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
283 tracePhyloBase :: Phylo -> Phylo
284 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
285 <> show (length $ _phylo_periods p) <> " periods from "
286 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
288 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
290 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
293 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
294 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
295 <> "count : " <> show (length pts) <> " pointers\n") p
297 --------------------------------------
299 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
300 --------------------------------------
303 traceBranches :: Level -> Phylo -> Phylo
304 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
305 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
306 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
307 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
308 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
309 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
310 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
312 --------------------------------------
314 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
315 $ filter (\(id,_) -> (fst id) == lvl)
316 $ getGroupsByBranches p
317 --------------------------------------