2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
19 module Gargantext.Viz.Phylo.LevelMaker
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Text.Context (TermList)
37 import qualified Data.Vector.Storable as VS
38 import qualified Data.Set as Set
39 import qualified Data.Vector as Vector
41 import Debug.Trace (trace)
42 import Numeric.Statistics (percentile)
45 -- | A typeClass for polymorphic PhyloLevel functions
46 class PhyloLevelMaker aggregate
48 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
49 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
50 -- | To create a list of PhyloGroups based on a list of aggregates a
51 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
54 instance PhyloLevelMaker PhyloCluster
56 --------------------------------------
57 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
59 | lvl > 1 = toPhyloLevel lvl m p
60 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
61 --------------------------------------
62 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
63 toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
64 --------------------------------------
67 instance PhyloLevelMaker PhyloFis
69 --------------------------------------
70 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
72 | lvl == 1 = toPhyloLevel lvl m p
73 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
74 --------------------------------------
75 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
76 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
77 --------------------------------------
80 instance PhyloLevelMaker Document
82 --------------------------------------
83 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
85 | lvl == 0 = toPhyloLevel lvl m p
86 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
87 --------------------------------------
88 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
89 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
93 --------------------------------------
96 -- | To transform a Cluster into a Phylogroup
97 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
98 clusterToGroup prd lvl idx lbl groups _m =
99 PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
101 --------------------------------------
103 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
104 --------------------------------------
107 -- | To transform a Clique into a PhyloGroup
108 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
109 cliqueToGroup prd lvl idx lbl fis p =
110 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
112 --------------------------------------
114 ngrams = sort $ map (\x -> getIdxInRoots x p)
117 --------------------------------------
120 -- | To transform a list of Ngrams into a PhyloGroup
121 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
122 ngramsToGroup prd lvl idx lbl ngrams p =
123 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
126 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
127 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
128 toPhyloLevel lvl m p = alterPhyloPeriods
129 (\period -> let pId = _phylo_periodId period
130 in over (phylo_periodLevels)
132 let groups = toPhyloGroups lvl pId (m ! pId) m p
133 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
137 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
138 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
139 toNthLevel lvlMax prox clus p
141 | otherwise = toNthLevel lvlMax prox clus
142 $ traceBranches (lvl + 1)
143 $ setPhyloBranches (lvl + 1)
144 -- $ traceTempoMatching Descendant (lvl + 1)
145 -- $ interTempoMatching Descendant (lvl + 1) prox
146 -- $ traceTempoMatching Ascendant (lvl + 1)
147 -- $ interTempoMatching Ascendant (lvl + 1) prox
148 $ transposePeriodLinks (lvl + 1)
149 $ setLevelLinks (lvl, lvl + 1)
150 $ addPhyloLevel (lvl + 1)
151 (phyloToClusters lvl clus p) p
153 --------------------------------------
156 --------------------------------------
159 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
160 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
161 toPhylo1 clus prox metrics filters d p = case clus of
162 Fis (FisParams k s t) -> traceBranches 1
164 $ traceTempoMatching Descendant 1
165 $ interTempoMatching Descendant 1 prox
166 $ traceTempoMatching Ascendant 1
167 $ interTempoMatching Ascendant 1 prox
168 $ setLevelLinks (0,1)
169 $ setLevelLinks (1,0)
170 $ addPhyloLevel 1 phyloFis p
172 --------------------------------------
173 phyloFis :: Map (Date, Date) [PhyloFis]
174 phyloFis = toPhyloFis d k s t metrics filters
175 --------------------------------------
177 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
180 -- | To reconstruct the Level 0 of a Phylo
181 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
182 toPhylo0 d p = addPhyloLevel 0 d p
185 class PhyloMaker corpus
187 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
188 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
189 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
191 instance PhyloMaker [(Date, Text)]
193 --------------------------------------
194 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
196 --------------------------------------
198 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
199 --------------------------------------
201 phylo0 = toPhylo0 phyloDocs phyloBase
202 --------------------------------------
203 phyloDocs :: Map (Date, Date) [Document]
204 phyloDocs = corpusToDocs c phyloBase
205 --------------------------------------
207 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
208 --------------------------------------
209 --------------------------------------
210 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
212 --------------------------------------
213 foundations :: PhyloFoundations
214 foundations = PhyloFoundations (initFoundationsRoots roots) termList
215 --------------------------------------
216 periods :: [(Date,Date)]
217 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
218 $ both fst (head' "LevelMaker" c,last c)
219 --------------------------------------
220 --------------------------------------
221 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
224 instance PhyloMaker [Document]
226 --------------------------------------
227 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
229 --------------------------------------
231 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
232 --------------------------------------
234 phylo0 = toPhylo0 phyloDocs phyloBase
235 --------------------------------------
236 phyloDocs :: Map (Date, Date) [Document]
237 phyloDocs = corpusToDocs c phyloBase
238 --------------------------------------
240 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
241 --------------------------------------
242 --------------------------------------
243 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
245 --------------------------------------
246 foundations :: PhyloFoundations
247 foundations = PhyloFoundations (initFoundationsRoots roots) termList
248 --------------------------------------
249 periods :: [(Date,Date)]
250 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
251 $ both date (head' "LevelMaker" c,last c)
252 --------------------------------------
253 --------------------------------------
254 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
262 tracePhyloBase :: Phylo -> Phylo
263 tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
264 <> show (length $ _phylo_periods p) <> " periods from "
265 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
267 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
269 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
272 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
273 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
274 <> "count : " <> show (length pts) <> " pointers\n"
275 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
276 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
277 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
278 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
280 --------------------------------------
282 sim = sort $ map snd pts
283 --------------------------------------
285 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
286 --------------------------------------
289 traceBranches :: Level -> Phylo -> Phylo
290 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
291 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
292 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
293 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
294 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
295 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
296 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
298 --------------------------------------
300 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
301 $ filter (\(id,_) -> (fst id) == lvl)
302 $ getGroupsByBranches p
303 --------------------------------------