2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Parallel.Strategies
23 import Control.Lens hiding (both, Level)
24 import Data.List ((++), sort, concat, nub, zip, last)
25 import Data.Map (Map, (!), empty, singleton)
26 import Data.Text (Text)
27 import Data.Tuple.Extra
28 import Gargantext.Prelude
29 import Gargantext.Viz.Phylo
30 import Gargantext.Viz.Phylo.Metrics
31 import Gargantext.Viz.Phylo.Aggregates
32 import Gargantext.Viz.Phylo.Cluster
33 import Gargantext.Viz.Phylo.BranchMaker
34 import Gargantext.Viz.Phylo.LinkMaker
35 import Gargantext.Viz.Phylo.Tools
36 import Gargantext.Text.Context (TermList)
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
46 -------------------------
47 -- | PhyloLevelMaker | --
48 -------------------------
51 -- | A typeClass for polymorphic PhyloLevel functions
52 class PhyloLevelMaker aggregate
54 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
55 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
56 -- | To create a list of PhyloGroups based on a list of aggregates a
57 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
60 instance PhyloLevelMaker PhyloCluster
62 --------------------------------------
63 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
65 | lvl > 1 = addPhyloLevel' lvl m p
66 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
67 --------------------------------------
68 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
69 toPhyloGroups lvl (d,d') l m p =
70 let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
71 clusters' = clusters `using` parList rdeepseq
73 --------------------------------------
76 instance PhyloLevelMaker PhyloFis
78 --------------------------------------
79 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
81 | lvl == 1 = addPhyloLevel' lvl m p
82 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
83 --------------------------------------
84 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
85 toPhyloGroups lvl (d,d') l _ p =
86 let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
87 groups' = groups `using` parList rdeepseq
89 --------------------------------------
92 instance PhyloLevelMaker Document
94 --------------------------------------
95 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
97 | lvl == 0 = addPhyloLevel' lvl m p
98 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
99 --------------------------------------
100 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
101 toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
104 --------------------------------------
107 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
108 addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
109 addPhyloLevel' lvl m p = alterPhyloPeriods
110 (\period -> let pId = _phylo_periodId period
111 in over (phylo_periodLevels)
113 let groups = toPhyloGroups lvl pId (m ! pId) m p
114 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
118 ----------------------
119 -- | toPhyloGroup | --
120 ----------------------
123 -- | To transform a Clique into a PhyloGroup
124 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
125 cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
126 (getNgramsMeta cooc ngrams)
128 (singleton "support" (fromIntegral $ getSupport fis))
133 --------------------------------------
134 cooc :: Map (Int, Int) Double
135 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
136 --------------------------------------
138 ngrams = sort $ map (\x -> getIdxInRoots x p)
141 --------------------------------------
143 childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
144 --------------------------------------
147 -- | To transform a Cluster into a Phylogroup
148 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
149 clusterToGroup prd lvl idx lbl groups _m p =
150 PhyloGroup ((prd, lvl), idx) lbl ngrams
151 (getNgramsMeta cooc ngrams)
156 ascLink desLink [] childs
158 --------------------------------------
159 cooc :: Map (Int, Int) Double
160 cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
161 --------------------------------------
163 childs = map (\g -> (getGroupId g, 1)) groups
164 ascLink = concat $ map getGroupPeriodParents groups
165 desLink = concat $ map getGroupPeriodChilds groups
166 --------------------------------------
168 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
169 --------------------------------------
172 -- | To transform a list of Ngrams into a PhyloGroup
173 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
174 ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
175 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
179 ----------------------
180 -- | toPhyloLevel | --
181 ----------------------
184 -- | To reconstruct the Phylo from a set of Document to a given Level
185 toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
186 toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
188 --------------------------------------
190 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
191 --------------------------------------
193 phylo0 = tracePhyloN 0
194 $ addPhyloLevel 0 phyloDocs phyloBase
195 --------------------------------------
196 phyloDocs :: Map (Date, Date) [Document]
197 phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
198 --------------------------------------
200 phyloBase = tracePhyloBase
201 $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
202 --------------------------------------
205 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
206 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
207 toNthLevel lvlMax prox clus p
209 | otherwise = toNthLevel lvlMax prox clus
210 $ traceBranches (lvl + 1)
211 $ setPhyloBranches (lvl + 1)
212 $ transposePeriodLinks (lvl + 1)
213 $ tracePhyloN (lvl + 1)
214 $ setLevelLinks (lvl, lvl + 1)
215 $ addPhyloLevel (lvl + 1)
218 --------------------------------------
219 clusters :: Map (Date,Date) [PhyloCluster]
220 clusters = phyloToClusters lvl clus p
221 --------------------------------------
224 --------------------------------------
227 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
228 toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
229 toPhylo1 clus prox d p = case clus of
230 Fis (FisParams k s t) -> traceBranches 1
231 -- $ reLinkPhyloBranches 1
234 $ traceTempoMatching Descendant 1
235 $ interTempoMatching Descendant 1 prox
236 $ traceTempoMatching Ascendant 1
237 $ interTempoMatching Ascendant 1 prox
239 $ setLevelLinks (0,1)
240 $ addPhyloLevel 1 phyloFis phylo'
242 --------------------------------------
243 phyloFis :: Map (Date, Date) [PhyloFis]
244 phyloFis = refineFis (getPhyloFis phylo') k s t
245 --------------------------------------
247 phylo' = docsToFis d p
248 --------------------------------------
250 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
253 -- | To create the base of the Phylo (foundations, periods, cooc, etc)
254 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
255 toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
257 --------------------------------------
258 cooc :: Map Date (Map (Int,Int) Double)
259 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
260 --------------------------------------
261 nbDocs :: Map Date Double
262 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
263 --------------------------------------
264 foundations :: PhyloFoundations
265 foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
266 --------------------------------------
267 periods :: [(Date,Date)]
268 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
269 $ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
270 --------------------------------------
278 tracePhyloN :: Level -> Phylo -> Phylo
279 tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
280 <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
283 tracePhyloBase :: Phylo -> Phylo
284 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
285 <> show (length $ _phylo_periods p) <> " periods from "
286 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
288 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
290 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
293 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
294 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
295 <> "count : " <> show (length pts) <> " pointers\n") p
297 --------------------------------------
299 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
300 --------------------------------------
303 traceBranches :: Level -> Phylo -> Phylo
304 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
305 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
306 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
307 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
308 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
309 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
310 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
312 --------------------------------------
314 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
315 $ filter (\(id,_) -> (fst id) == lvl)
316 $ getGroupsByBranches p
317 --------------------------------------