2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Text.Context (TermList)
37 import qualified Data.Vector.Storable as VS
38 import qualified Data.Set as Set
39 import qualified Data.Vector as Vector
41 import Debug.Trace (trace)
42 import Numeric.Statistics (percentile)
45 -- | A typeClass for polymorphic PhyloLevel functions
46 class PhyloLevelMaker aggregate
48 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
49 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
50 -- | To create a list of PhyloGroups based on a list of aggregates a
51 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54 instance PhyloLevelMaker PhyloCluster
56 --------------------------------------
57 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 | lvl > 1 = toPhyloLevel lvl m p
60 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
61 --------------------------------------
62 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
63 toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
64 --------------------------------------
67 instance PhyloLevelMaker PhyloFis
69 --------------------------------------
70 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 | lvl == 1 = toPhyloLevel lvl m p
73 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
74 --------------------------------------
75 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
76 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
77 --------------------------------------
80 instance PhyloLevelMaker Document
82 --------------------------------------
83 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 | lvl == 0 = toPhyloLevel lvl m p
86 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
87 --------------------------------------
88 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
89 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
93 --------------------------------------
96 -- | To transform a Cluster into a Phylogroup
97 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
98 clusterToGroup prd lvl idx lbl groups _m =
99 PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
101 --------------------------------------
103 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
104 --------------------------------------
107 -- | To transform a Clique into a PhyloGroup
108 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
109 cliqueToGroup prd lvl idx lbl fis p =
110 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
112 --------------------------------------
114 ngrams = sort $ map (\x -> getIdxInRoots x p)
117 --------------------------------------
120 -- | To transform a list of Ngrams into a PhyloGroup
121 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
122 ngramsToGroup prd lvl idx lbl ngrams p =
123 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
126 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
127 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
128 toPhyloLevel lvl m p = alterPhyloPeriods
129 (\period -> let pId = _phylo_periodId period
130 in over (phylo_periodLevels)
132 let groups = toPhyloGroups lvl pId (m ! pId) m p
133 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
137 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
138 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
139 toNthLevel lvlMax prox clus p
141 | otherwise = toNthLevel lvlMax prox clus
142 $ traceBranches (lvl + 1)
143 $ setPhyloBranches (lvl + 1)
144 $ traceTempoMatching Descendant (lvl + 1)
145 $ interTempoMatching Descendant (lvl + 1) prox
146 $ traceTempoMatching Ascendant (lvl + 1)
147 $ interTempoMatching Ascendant (lvl + 1) prox
148 $ setLevelLinks (lvl, lvl + 1)
149 $ addPhyloLevel (lvl + 1)
150 (phyloToClusters lvl clus p) p
152 --------------------------------------
155 --------------------------------------
158 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
159 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
160 toPhylo1 clus prox metrics filters d p = case clus of
161 Fis (FisParams k s t) -> traceBranches 1
163 $ traceTempoMatching Descendant 1
164 $ interTempoMatching Descendant 1 prox
165 $ traceTempoMatching Ascendant 1
166 $ interTempoMatching Ascendant 1 prox
167 $ setLevelLinks (0,1)
168 $ setLevelLinks (1,0)
169 $ addPhyloLevel 1 phyloFis p
171 --------------------------------------
172 phyloFis :: Map (Date, Date) [PhyloFis]
173 phyloFis = toPhyloFis d k s t metrics filters
174 --------------------------------------
176 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
179 -- | To reconstruct the Level 0 of a Phylo
180 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
181 toPhylo0 d p = addPhyloLevel 0 d p
184 class PhyloMaker corpus
186 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
187 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
188 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
190 instance PhyloMaker [(Date, Text)]
192 --------------------------------------
193 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
195 --------------------------------------
197 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
198 --------------------------------------
200 phylo0 = toPhylo0 phyloDocs phyloBase
201 --------------------------------------
202 phyloDocs :: Map (Date, Date) [Document]
203 phyloDocs = corpusToDocs c phyloBase
204 --------------------------------------
206 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
207 --------------------------------------
208 --------------------------------------
209 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
211 --------------------------------------
212 foundations :: PhyloFoundations
213 foundations = PhyloFoundations (initFoundationsRoots roots) termList
214 --------------------------------------
215 periods :: [(Date,Date)]
216 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
217 $ both fst (head' "LevelMaker" c,last c)
218 --------------------------------------
219 --------------------------------------
220 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
223 instance PhyloMaker [Document]
225 --------------------------------------
226 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
228 --------------------------------------
230 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
231 --------------------------------------
233 phylo0 = toPhylo0 phyloDocs phyloBase
234 --------------------------------------
235 phyloDocs :: Map (Date, Date) [Document]
236 phyloDocs = corpusToDocs c phyloBase
237 --------------------------------------
239 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
240 --------------------------------------
241 --------------------------------------
242 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
244 --------------------------------------
245 foundations :: PhyloFoundations
246 foundations = PhyloFoundations (initFoundationsRoots roots) termList
247 --------------------------------------
248 periods :: [(Date,Date)]
249 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
250 $ both date (head' "LevelMaker" c,last c)
251 --------------------------------------
252 --------------------------------------
253 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
261 tracePhyloBase :: Phylo -> Phylo
262 tracePhyloBase p = trace ( "\n-----------------\n--| PhyloBase |--\n-----------------\n\n"
263 <> show (length $ _phylo_periods p) <> " periods from "
264 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
266 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
268 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
271 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
272 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
273 <> "count : " <> show (length pts) <> " pointers\n"
274 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
275 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
276 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
277 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
279 --------------------------------------
281 sim = sort $ map snd pts
282 --------------------------------------
284 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
285 --------------------------------------
288 traceBranches :: Level -> Phylo -> Phylo
289 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
290 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
291 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
292 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
293 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
294 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
295 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
297 --------------------------------------
299 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
300 $ filter (\(id,_) -> (fst id) == lvl)
301 $ getGroupsByBranches p
302 --------------------------------------