2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Viz.Phylo.Aggregates.Cooc
36 import Gargantext.Text.Context (TermList)
38 import qualified Data.Vector.Storable as VS
39 import qualified Data.Set as Set
40 import qualified Data.Vector as Vector
42 import Debug.Trace (trace)
43 import Numeric.Statistics (percentile)
46 -- | A typeClass for polymorphic PhyloLevel functions
47 class PhyloLevelMaker aggregate
49 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
50 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
51 -- | To create a list of PhyloGroups based on a list of aggregates a
52 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
55 instance PhyloLevelMaker PhyloCluster
57 --------------------------------------
58 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
60 | lvl > 1 = toPhyloLevel lvl m p
61 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
62 --------------------------------------
63 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
64 toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
65 --------------------------------------
68 instance PhyloLevelMaker PhyloFis
70 --------------------------------------
71 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
73 | lvl == 1 = toPhyloLevel lvl m p
74 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
75 --------------------------------------
76 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
77 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
78 --------------------------------------
81 instance PhyloLevelMaker Document
83 --------------------------------------
84 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
86 | lvl == 0 = toPhyloLevel lvl m p
87 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
88 --------------------------------------
89 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
90 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
94 --------------------------------------
97 -- | To transform a Cluster into a Phylogroup
98 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
99 clusterToGroup prd lvl idx lbl groups _m p =
100 PhyloGroup ((prd, lvl), idx) lbl ngrams empty
102 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
103 [] [] [] (map (\g -> (getGroupId g, 1)) groups)
105 --------------------------------------
107 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
108 --------------------------------------
111 -- | To transform a Clique into a PhyloGroup
112 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
113 cliqueToGroup prd lvl idx lbl fis p =
114 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
115 (getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
118 --------------------------------------
120 ngrams = sort $ map (\x -> getIdxInRoots x p)
123 --------------------------------------
126 -- | To transform a list of Ngrams into a PhyloGroup
127 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
128 ngramsToGroup prd lvl idx lbl ngrams p =
129 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
130 (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
134 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
135 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
136 toPhyloLevel lvl m p = alterPhyloPeriods
137 (\period -> let pId = _phylo_periodId period
138 in over (phylo_periodLevels)
140 let groups = toPhyloGroups lvl pId (m ! pId) m p
141 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
145 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
146 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
147 toNthLevel lvlMax prox clus p
149 | otherwise = toNthLevel lvlMax prox clus
150 $ traceBranches (lvl + 1)
151 $ setPhyloBranches (lvl + 1)
152 $ transposePeriodLinks (lvl + 1)
153 $ setLevelLinks (lvl, lvl + 1)
154 $ addPhyloLevel (lvl + 1)
155 (phyloToClusters lvl clus p) p
157 --------------------------------------
160 --------------------------------------
163 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
164 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
165 toPhylo1 clus prox metrics filters d p = case clus of
166 Fis (FisParams k s t) -> traceBranches 1
168 $ traceTempoMatching Descendant 1
169 $ interTempoMatching Descendant 1 prox
170 $ traceTempoMatching Ascendant 1
171 $ interTempoMatching Ascendant 1 prox
172 $ setLevelLinks (0,1)
173 $ setLevelLinks (1,0)
174 $ addPhyloLevel 1 phyloFis phylo'
176 --------------------------------------
177 phyloFis :: Map (Date, Date) [PhyloFis]
178 phyloFis = toPhyloFis' (getPhyloFis phylo') k s t metrics filters
179 --------------------------------------
181 phylo' = docsToFis' d p
182 --------------------------------------
184 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
187 -- | To reconstruct the Level 0 of a Phylo
188 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
189 toPhylo0 d p = addPhyloLevel 0 d p
192 class PhyloMaker corpus
194 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
195 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
196 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
198 instance PhyloMaker [(Date, Text)]
200 --------------------------------------
201 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
203 --------------------------------------
205 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
206 --------------------------------------
208 phylo0 = toPhylo0 phyloDocs phyloBase
209 --------------------------------------
210 phyloDocs :: Map (Date, Date) [Document]
211 phyloDocs = corpusToDocs c phyloBase
212 --------------------------------------
214 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
215 --------------------------------------
216 --------------------------------------
217 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
219 --------------------------------------
220 cooc :: Map Date (Map (Int,Int) Double)
221 cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
222 --------------------------------------
223 nbDocs :: Map Date Double
225 --------------------------------------
226 foundations :: PhyloFoundations
227 foundations = PhyloFoundations (initFoundationsRoots roots) termList
228 --------------------------------------
229 periods :: [(Date,Date)]
230 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
231 $ both fst (head' "LevelMaker" c,last c)
232 --------------------------------------
233 --------------------------------------
234 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
237 instance PhyloMaker [Document]
239 --------------------------------------
240 toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
242 --------------------------------------
244 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
245 --------------------------------------
247 phylo0 = toPhylo0 phyloDocs phyloBase
248 --------------------------------------
249 phyloDocs :: Map (Date, Date) [Document]
250 phyloDocs = corpusToDocs c phyloBase
251 --------------------------------------
253 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
254 --------------------------------------
255 --------------------------------------
256 toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
258 --------------------------------------
259 cooc :: Map Date (Map (Int,Int) Double)
260 cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
261 --------------------------------------
262 nbDocs :: Map Date Double
263 nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
264 --------------------------------------
265 foundations :: PhyloFoundations
266 foundations = PhyloFoundations (initFoundationsRoots roots) termList
267 --------------------------------------
268 periods :: [(Date,Date)]
269 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
270 $ both date (head' "LevelMaker" c,last c)
271 --------------------------------------
272 --------------------------------------
273 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
281 tracePhyloBase :: Phylo -> Phylo
282 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
283 <> show (length $ _phylo_periods p) <> " periods from "
284 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
286 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
288 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
291 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
292 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
293 <> "count : " <> show (length pts) <> " pointers\n"
294 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
295 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
296 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
297 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
299 --------------------------------------
301 sim = sort $ map snd pts
302 --------------------------------------
304 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
305 --------------------------------------
308 traceBranches :: Level -> Phylo -> Phylo
309 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
310 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
311 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
312 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
313 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
314 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
315 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
317 --------------------------------------
319 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
320 $ filter (\(id,_) -> (fst id) == lvl)
321 $ getGroupsByBranches p
322 --------------------------------------