]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloTools.hs
working on synchony
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
16
17 module Gargantext.Viz.Phylo.PhyloTools where
18
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, intersect, (\\))
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
23 import Data.String (String)
24 import Data.Text (Text, unwords)
25
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
28
29 import Debug.Trace (trace)
30 import Control.Lens hiding (Level)
31
32 import qualified Data.Vector as Vector
33 import qualified Data.List as List
34 import qualified Data.Set as Set
35
36 --------------
37 -- | Misc | --
38 --------------
39
40
41 countSup :: Double -> [Double] -> Int
42 countSup s l = length $ filter (>s) l
43
44
45 elemIndex' :: Eq a => a -> [a] -> Int
46 elemIndex' e l = case (List.elemIndex e l) of
47 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
48 Just i -> i
49
50
51 ---------------------
52 -- | Foundations | --
53 ---------------------
54
55
56 -- | Is this Ngrams a Foundations Root ?
57 isRoots :: Ngrams -> Vector Ngrams -> Bool
58 isRoots n ns = Vector.elem n ns
59
60 -- | To transform a list of nrams into a list of foundation's index
61 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
62 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
63
64 -- | To transform a list of Ngrams Indexes into a Label
65 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
66 ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
67
68
69 -- | To transform a list of Ngrams Indexes into a list of Text
70 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
71 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
72
73
74 --------------
75 -- | Time | --
76 --------------
77
78 -- | To transform a list of periods into a set of Dates
79 periodsToYears :: [(Date,Date)] -> Set Date
80 periodsToYears periods = (Set.fromList . sort . concat)
81 $ map (\(d,d') -> [d..d']) periods
82
83
84 findBounds :: [Date] -> (Date,Date)
85 findBounds dates =
86 let dates' = sort dates
87 in (head' "findBounds" dates', last' "findBounds" dates')
88
89
90 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
91 toPeriods dates p s =
92 let (start,end) = findBounds dates
93 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
94 $ chunkAlong p s [start .. end]
95
96
97 -- | Get a regular & ascendante timeScale from a given list of dates
98 toTimeScale :: [Date] -> Int -> [Date]
99 toTimeScale dates step =
100 let (start,end) = findBounds dates
101 in [start, (start + step) .. end]
102
103
104 getTimeStep :: TimeUnit -> Int
105 getTimeStep time = case time of
106 Year _ s _ -> s
107
108 getTimePeriod :: TimeUnit -> Int
109 getTimePeriod time = case time of
110 Year p _ _ -> p
111
112 getTimeFrame :: TimeUnit -> Int
113 getTimeFrame time = case time of
114 Year _ _ f -> f
115
116 -------------
117 -- | Fis | --
118 -------------
119
120
121 -- | To find if l' is nested in l
122 isNested :: Eq a => [a] -> [a] -> Bool
123 isNested l l'
124 | null l' = True
125 | length l' > length l = False
126 | (union l l') == l = True
127 | otherwise = False
128
129
130 -- | To filter Fis with small Support but by keeping non empty Periods
131 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
132 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
133 then keepFilled f (thr - 1) l
134 else f thr l
135
136
137 traceClique :: Map (Date, Date) [PhyloFis] -> String
138 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
139 where
140 --------------------------------------
141 cliques :: [Double]
142 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
143 --------------------------------------
144
145
146 traceSupport :: Map (Date, Date) [PhyloFis] -> String
147 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
148 where
149 --------------------------------------
150 supports :: [Double]
151 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
152 --------------------------------------
153
154
155 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
156 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
157 <> "Support : " <> (traceSupport mFis) <> "\n"
158 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
159
160
161 -------------------------
162 -- | Contextual unit | --
163 -------------------------
164
165
166 getFisSupport :: ContextualUnit -> Int
167 getFisSupport unit = case unit of
168 Fis s _ -> s
169 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
170
171 getFisSize :: ContextualUnit -> Int
172 getFisSize unit = case unit of
173 Fis _ s -> s
174 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
175
176
177 --------------
178 -- | Cooc | --
179 --------------
180
181 listToCombi' :: [a] -> [(a,a)]
182 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
183
184 listToEqual' :: Eq a => [a] -> [(a,a)]
185 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
186
187 listToKeys :: Eq a => [a] -> [(a,a)]
188 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
189
190 listToMatrix :: [Int] -> Map (Int,Int) Double
191 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
192
193 sumCooc :: Cooc -> Cooc -> Cooc
194 sumCooc cooc cooc' = unionWith (+) cooc cooc'
195
196 getTrace :: Cooc -> Double
197 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
198
199
200 -- | To build the local cooc matrix of each phylogroup
201 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
202 ngramsToCooc ngrams coocs =
203 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
204 pairs = listToKeys ngrams
205 in filterWithKey (\k _ -> elem k pairs) cooc
206
207
208 --------------------
209 -- | PhyloGroup | --
210 --------------------
211
212 getGroupId :: PhyloGroup -> PhyloGroupId
213 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
214
215 ---------------
216 -- | Phylo | --
217 ---------------
218
219 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
220 addPointers group fil pty pointers =
221 case pty of
222 TemporalPointer -> case fil of
223 ToChilds -> group & phylo_groupPeriodChilds .~ pointers
224 ToParents -> group & phylo_groupPeriodParents .~ pointers
225 LevelPointer -> case fil of
226 ToChilds -> group & phylo_groupLevelChilds .~ pointers
227 ToParents -> group & phylo_groupLevelParents .~ pointers
228
229
230 getPeriodIds :: Phylo -> [(Date,Date)]
231 getPeriodIds phylo = sortOn fst
232 $ keys
233 $ phylo ^. phylo_periods
234
235 getLastLevel :: Phylo -> Level
236 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
237
238 getLevels :: Phylo -> [Level]
239 getLevels phylo = nub
240 $ map snd
241 $ keys $ view ( phylo_periods
242 . traverse
243 . phylo_periodLevels ) phylo
244
245
246 getConfig :: Phylo -> Config
247 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
248
249
250 getRoots :: Phylo -> Vector Ngrams
251 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
252
253 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
254 phyloToLastBranches phylo = elems
255 $ fromListWith (++)
256 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
257 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
258
259 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
260 getGroupsFromLevel lvl phylo =
261 elems $ view ( phylo_periods
262 . traverse
263 . phylo_periodLevels
264 . traverse
265 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
266 . phylo_levelGroups ) phylo
267
268
269 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
270 updatePhyloGroups lvl m phylo =
271 over ( phylo_periods
272 . traverse
273 . phylo_periodLevels
274 . traverse
275 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
276 . phylo_levelGroups
277 . traverse
278 ) (\group ->
279 let id = getGroupId group
280 in
281 if member id m
282 then m ! id
283 else group ) phylo
284
285 --------------------
286 -- | Clustering | --
287 --------------------
288
289
290 relatedComponents :: Eq a => [[a]] -> [[a]]
291 relatedComponents graphs = foldl' (\mem groups ->
292 if (null mem)
293 then mem ++ [groups]
294 else
295 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
296 in if (null related)
297 then mem ++ [groups]
298 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
299
300
301 -------------------
302 -- | Proximity | --
303 -------------------
304
305 getSensibility :: Proximity -> Double
306 getSensibility proxi = case proxi of
307 WeightedLogJaccard s _ _ -> s
308 Hamming -> undefined
309
310 getThresholdInit :: Proximity -> Double
311 getThresholdInit proxi = case proxi of
312 WeightedLogJaccard _ t _ -> t
313 Hamming -> undefined
314
315 getThresholdStep :: Proximity -> Double
316 getThresholdStep proxi = case proxi of
317 WeightedLogJaccard _ _ s -> s
318 Hamming -> undefined
319
320
321 ----------------
322 -- | Branch | --
323 ----------------
324
325 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
326 intersectInit acc lst lst' =
327 if (null lst) || (null lst')
328 then acc
329 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
330 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
331 else acc
332
333 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
334 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
335
336 ngramsInBranches :: [[PhyloGroup]] -> [Int]
337 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
338
339
340 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
341 traceMatchSuccess thr qua qua' nextBranches =
342 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
343 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
344 <> ",(1.." <> show (length nextBranches) <> ")]"
345 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
346 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
347 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
348
349
350 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
351 traceMatchFailure thr qua qua' branches =
352 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
353 <> ",(1.." <> show (length branches) <> ")]"
354 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
355 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
356 ) branches
357
358
359 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
360 traceMatchNoSplit branches =
361 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
362 <> ",(1.." <> show (length branches) <> ")]"
363 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
364 <> " - unable to split in smaller branches" <> "\n"
365 ) branches
366
367
368 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
369 traceMatchLimit branches =
370 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
371 <> ",(1.." <> show (length branches) <> ")]"
372 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
373 <> " - unable to increase the threshold above 1" <> "\n"
374 ) branches
375
376
377 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
378 traceMatchEnd groups =
379 trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
380 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups