]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/LevelMaker.hs
fix the coocurencies
[gargantext.git] / src / Gargantext / Viz / Phylo / LevelMaker.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE TypeSynonymInstances #-}
17 {-# LANGUAGE FlexibleInstances #-}
18
19 module Gargantext.Viz.Phylo.LevelMaker
20 where
21
22 import Control.Lens hiding (both, Level)
23 import Data.List ((++), sort, concat, nub, zip, last)
24 import Data.Map (Map, (!), empty, singleton)
25 import Data.Text (Text)
26 import Data.Tuple.Extra
27 import Gargantext.Prelude
28 import Gargantext.Viz.Phylo
29 import Gargantext.Viz.Phylo.Aggregates.Cluster
30 import Gargantext.Viz.Phylo.Aggregates.Document
31 import Gargantext.Viz.Phylo.Aggregates.Fis
32 import Gargantext.Viz.Phylo.BranchMaker
33 import Gargantext.Viz.Phylo.LinkMaker
34 import Gargantext.Viz.Phylo.Tools
35 import Gargantext.Text.Context (TermList)
36
37 import qualified Data.Vector.Storable as VS
38 import qualified Data.Set as Set
39 import qualified Data.Vector as Vector
40
41 import Debug.Trace (trace)
42 import Numeric.Statistics (percentile)
43
44
45 -- | A typeClass for polymorphic PhyloLevel functions
46 class PhyloLevelMaker aggregate
47 where
48 -- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
49 addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
50 -- | To create a list of PhyloGroups based on a list of aggregates a
51 toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
52
53
54 instance PhyloLevelMaker PhyloCluster
55 where
56 --------------------------------------
57 -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
58 addPhyloLevel lvl m p
59 | lvl > 1 = toPhyloLevel lvl m p
60 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
61 --------------------------------------
62 -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
63 toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
64 --------------------------------------
65
66
67 instance PhyloLevelMaker PhyloFis
68 where
69 --------------------------------------
70 -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
71 addPhyloLevel lvl m p
72 | lvl == 1 = toPhyloLevel lvl m p
73 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
74 --------------------------------------
75 -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
76 toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
77 --------------------------------------
78
79
80 instance PhyloLevelMaker Document
81 where
82 --------------------------------------
83 -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
84 addPhyloLevel lvl m p
85 | lvl == 0 = toPhyloLevel lvl m p
86 | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
87 --------------------------------------
88 -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
89 toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
90 $ zip [1..]
91 $ (nub . concat)
92 $ map text l
93 --------------------------------------
94
95
96 -- | To transform a Cluster into a Phylogroup
97 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
98 clusterToGroup prd lvl idx lbl groups _m =
99 PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
100 where
101 --------------------------------------
102 ngrams :: [Int]
103 ngrams = (sort . nub . concat) $ map getGroupNgrams groups
104 --------------------------------------
105
106
107 -- | To transform a Clique into a PhyloGroup
108 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
109 cliqueToGroup prd lvl idx lbl fis p =
110 PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
111 where
112 --------------------------------------
113 ngrams :: [Int]
114 ngrams = sort $ map (\x -> getIdxInRoots x p)
115 $ Set.toList
116 $ getClique fis
117 --------------------------------------
118
119
120 -- | To transform a list of Ngrams into a PhyloGroup
121 ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
122 ngramsToGroup prd lvl idx lbl ngrams p =
123 PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
124
125
126 -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
127 toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
128 toPhyloLevel lvl m p = alterPhyloPeriods
129 (\period -> let pId = _phylo_periodId period
130 in over (phylo_periodLevels)
131 (\phyloLevels ->
132 let groups = toPhyloGroups lvl pId (m ! pId) m p
133 in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
134 ) period) p
135
136
137 -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
138 toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
139 toNthLevel lvlMax prox clus p
140 | lvl >= lvlMax = p
141 | otherwise = toNthLevel lvlMax prox clus
142 $ traceBranches (lvl + 1)
143 $ setPhyloBranches (lvl + 1)
144 $ traceTempoMatching Descendant (lvl + 1)
145 $ interTempoMatching Descendant (lvl + 1) prox
146 $ traceTempoMatching Ascendant (lvl + 1)
147 $ interTempoMatching Ascendant (lvl + 1) prox
148 $ setLevelLinks (lvl, lvl + 1)
149 $ addPhyloLevel (lvl + 1)
150 (phyloToClusters lvl clus p) p
151 where
152 --------------------------------------
153 lvl :: Level
154 lvl = getLastLevel p
155 --------------------------------------
156
157
158 -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
159 toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
160 toPhylo1 clus prox metrics filters d p = case clus of
161 Fis (FisParams k s t) -> traceBranches 1
162 $ setPhyloBranches 1
163 $ traceTempoMatching Descendant 1
164 $ interTempoMatching Descendant 1 prox
165 $ traceTempoMatching Ascendant 1
166 $ interTempoMatching Ascendant 1 prox
167 $ setLevelLinks (0,1)
168 $ setLevelLinks (1,0)
169 $ addPhyloLevel 1 phyloFis p
170 where
171 --------------------------------------
172 phyloFis :: Map (Date, Date) [PhyloFis]
173 phyloFis = toPhyloFis d k s t metrics filters
174 --------------------------------------
175
176 _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
177
178
179 -- | To reconstruct the Level 0 of a Phylo
180 toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
181 toPhylo0 d p = addPhyloLevel 0 d p
182
183
184 class PhyloMaker corpus
185 where
186 toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
187 toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
188 corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
189
190 instance PhyloMaker [(Date, Text)]
191 where
192 --------------------------------------
193 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
194 where
195 --------------------------------------
196 phylo1 :: Phylo
197 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
198 --------------------------------------
199 phylo0 :: Phylo
200 phylo0 = toPhylo0 phyloDocs phyloBase
201 --------------------------------------
202 phyloDocs :: Map (Date, Date) [Document]
203 phyloDocs = corpusToDocs c phyloBase
204 --------------------------------------
205 phyloBase :: Phylo
206 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
207 --------------------------------------
208 --------------------------------------
209 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
210 where
211 --------------------------------------
212 foundations :: PhyloFoundations
213 foundations = PhyloFoundations (initFoundationsRoots roots) termList
214 --------------------------------------
215 periods :: [(Date,Date)]
216 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
217 $ both fst (head' "LevelMaker" c,last c)
218 --------------------------------------
219 --------------------------------------
220 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c
221
222
223 instance PhyloMaker [Document]
224 where
225 --------------------------------------
226 toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
227 where
228 --------------------------------------
229 phylo1 :: Phylo
230 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
231 --------------------------------------
232 phylo0 :: Phylo
233 phylo0 = toPhylo0 phyloDocs phyloBase
234 --------------------------------------
235 phyloDocs :: Map (Date, Date) [Document]
236 phyloDocs = corpusToDocs c phyloBase
237 --------------------------------------
238 phyloBase :: Phylo
239 phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
240 --------------------------------------
241 --------------------------------------
242 toPhyloBase q p c roots termList = initPhyloBase periods foundations p
243 where
244 --------------------------------------
245 foundations :: PhyloFoundations
246 foundations = PhyloFoundations (initFoundationsRoots roots) termList
247 --------------------------------------
248 periods :: [(Date,Date)]
249 periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
250 $ both date (head' "LevelMaker" c,last c)
251 --------------------------------------
252 --------------------------------------
253 corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
254
255
256 -----------------
257 -- | Tracers | --
258 -----------------
259
260
261 tracePhyloBase :: Phylo -> Phylo
262 tracePhyloBase p = trace ( "\n-----------------\n--| PhyloBase |--\n-----------------\n\n"
263 <> show (length $ _phylo_periods p) <> " periods from "
264 <> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
265 <> " to "
266 <> show (getPhyloPeriodId $ last $ _phylo_periods p)
267 <> "\n"
268 <> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
269
270
271 traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
272 traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
273 <> "count : " <> show (length pts) <> " pointers\n"
274 <> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
275 <> show (percentile 50 (VS.fromList sim)) <> " (50%) "
276 <> show (percentile 75 (VS.fromList sim)) <> " (75%) "
277 <> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
278 where
279 --------------------------------------
280 sim :: [Double]
281 sim = sort $ map snd pts
282 --------------------------------------
283 pts :: [Pointer]
284 pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
285 --------------------------------------
286
287
288 traceBranches :: Level -> Phylo -> Phylo
289 traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
290 <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
291 <> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
292 <> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
293 <> show (percentile 50 (VS.fromList brs)) <> " (50%) "
294 <> show (percentile 75 (VS.fromList brs)) <> " (75%) "
295 <> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
296 where
297 --------------------------------------
298 brs :: [Double]
299 brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
300 $ filter (\(id,_) -> (fst id) == lvl)
301 $ getGroupsByBranches p
302 --------------------------------------