2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
17 module Gargantext.Viz.Phylo.PhyloTools where
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition)
21 import Data.Set (Set, size, disjoint)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
23 import Data.String (String)
24 import Data.Text (Text, unwords)
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
31 import Debug.Trace (trace)
32 import Control.Lens hiding (Level)
34 import qualified Data.Vector as Vector
35 import qualified Data.List as List
36 import qualified Data.Set as Set
42 -- | To print an important message as an IO()
43 printIOMsg :: String -> IO ()
48 <> "-- | " <> msg <> "\n" )
51 -- | To print a comment as an IO()
52 printIOComment :: String -> IO ()
54 putStrLn ( "\n" <> cmt <> "\n" )
62 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
63 roundToStr = printf "%0.*f"
66 countSup :: Double -> [Double] -> Int
67 countSup s l = length $ filter (>s) l
70 elemIndex' :: Eq a => a -> [a] -> Int
71 elemIndex' e l = case (List.elemIndex e l) of
72 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
81 -- | Is this Ngrams a Foundations Root ?
82 isRoots :: Ngrams -> Vector Ngrams -> Bool
83 isRoots n ns = Vector.elem n ns
85 -- | To transform a list of nrams into a list of foundation's index
86 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
87 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
89 -- | To transform a list of Ngrams Indexes into a Label
90 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
91 ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
94 -- | To transform a list of Ngrams Indexes into a list of Text
95 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
96 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
103 -- | To transform a list of periods into a set of Dates
104 periodsToYears :: [(Date,Date)] -> Set Date
105 periodsToYears periods = (Set.fromList . sort . concat)
106 $ map (\(d,d') -> [d..d']) periods
109 findBounds :: [Date] -> (Date,Date)
111 let dates' = sort dates
112 in (head' "findBounds" dates', last' "findBounds" dates')
115 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
116 toPeriods dates p s =
117 let (start,end) = findBounds dates
118 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
119 $ chunkAlong p s [start .. end]
122 -- | Get a regular & ascendante timeScale from a given list of dates
123 toTimeScale :: [Date] -> Int -> [Date]
124 toTimeScale dates step =
125 let (start,end) = findBounds dates
126 in [start, (start + step) .. end]
129 getTimeStep :: TimeUnit -> Int
130 getTimeStep time = case time of
133 getTimePeriod :: TimeUnit -> Int
134 getTimePeriod time = case time of
137 getTimeFrame :: TimeUnit -> Int
138 getTimeFrame time = case time of
146 -- | To find if l' is nested in l
147 isNested :: Eq a => [a] -> [a] -> Bool
150 | length l' > length l = False
151 | (union l l') == l = True
155 -- | To filter Fis with small Support but by keeping non empty Periods
156 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
157 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
158 then keepFilled f (thr - 1) l
162 traceClique :: Map (Date, Date) [PhyloFis] -> String
163 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
165 --------------------------------------
167 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
168 --------------------------------------
171 traceSupport :: Map (Date, Date) [PhyloFis] -> String
172 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
174 --------------------------------------
176 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
177 --------------------------------------
180 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
181 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
182 <> "Support : " <> (traceSupport mFis) <> "\n"
183 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
186 -------------------------
187 -- | Contextual unit | --
188 -------------------------
191 getFisSupport :: ContextualUnit -> Int
192 getFisSupport unit = case unit of
194 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
196 getFisSize :: ContextualUnit -> Int
197 getFisSize unit = case unit of
199 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
206 listToCombi' :: [a] -> [(a,a)]
207 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
209 listToEqual' :: Eq a => [a] -> [(a,a)]
210 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
212 listToKeys :: Eq a => [a] -> [(a,a)]
213 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
215 listToMatrix :: [Int] -> Map (Int,Int) Double
216 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
218 sumCooc :: Cooc -> Cooc -> Cooc
219 sumCooc cooc cooc' = unionWith (+) cooc cooc'
221 getTrace :: Cooc -> Double
222 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
225 -- | To build the local cooc matrix of each phylogroup
226 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
227 ngramsToCooc ngrams coocs =
228 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
229 pairs = listToKeys ngrams
230 in filterWithKey (\k _ -> elem k pairs) cooc
237 getGroupId :: PhyloGroup -> PhyloGroupId
238 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
240 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
241 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
243 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
244 getPeriodPointers fil group =
246 ToChilds -> group ^. phylo_groupPeriodChilds
247 ToParents -> group ^. phylo_groupPeriodParents
249 filterProximity :: Proximity -> Double -> Double -> Bool
250 filterProximity proximity thr local =
252 WeightedLogJaccard _ _ _ -> local >= thr
255 filterPointers :: Filiation -> PointerType -> Proximity -> Double -> PhyloGroup -> PhyloGroup
256 filterPointers fil pty proximity thr group =
258 TemporalPointer -> case fil of
259 ToChilds -> group & phylo_groupPeriodChilds %~ (filter (\(_,w) -> filterProximity proximity thr w))
260 ToParents -> group & phylo_groupPeriodParents %~ (filter (\(_,w) -> filterProximity proximity thr w))
261 LevelPointer -> undefined
268 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
269 addPointers group fil pty pointers =
271 TemporalPointer -> case fil of
272 ToChilds -> group & phylo_groupPeriodChilds .~ pointers
273 ToParents -> group & phylo_groupPeriodParents .~ pointers
274 LevelPointer -> case fil of
275 ToChilds -> group & phylo_groupLevelChilds .~ pointers
276 ToParents -> group & phylo_groupLevelParents .~ pointers
279 getPeriodIds :: Phylo -> [(Date,Date)]
280 getPeriodIds phylo = sortOn fst
282 $ phylo ^. phylo_periods
284 getLevelParentId :: PhyloGroup -> PhyloGroupId
285 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
287 getLastLevel :: Phylo -> Level
288 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
290 getLevels :: Phylo -> [Level]
291 getLevels phylo = nub
293 $ keys $ view ( phylo_periods
295 . phylo_periodLevels ) phylo
298 getConfig :: Phylo -> Config
299 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
302 getRoots :: Phylo -> Vector Ngrams
303 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
305 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
306 phyloToLastBranches phylo = elems
308 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
309 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
311 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
312 getGroupsFromLevel lvl phylo =
313 elems $ view ( phylo_periods
317 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
318 . phylo_levelGroups ) phylo
321 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
322 updatePhyloGroups lvl m phylo =
327 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
331 let id = getGroupId group
338 traceToPhylo :: Level -> Phylo -> Phylo
339 traceToPhylo lvl phylo =
340 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
341 <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
342 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
348 relatedComponents :: Ord a => [[a]] -> [[a]]
349 relatedComponents graph = foldl' (\acc groups ->
353 let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
354 in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
357 traceSynchronyEnd :: Phylo -> Phylo
358 traceSynchronyEnd phylo =
359 trace ( "\n" <> "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
360 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
361 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
364 traceSynchronyStart :: Phylo -> Phylo
365 traceSynchronyStart phylo =
366 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
367 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
368 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
376 getSensibility :: Proximity -> Double
377 getSensibility proxi = case proxi of
378 WeightedLogJaccard s _ _ -> s
381 getThresholdInit :: Proximity -> Double
382 getThresholdInit proxi = case proxi of
383 WeightedLogJaccard _ t _ -> t
386 getThresholdStep :: Proximity -> Double
387 getThresholdStep proxi = case proxi of
388 WeightedLogJaccard _ _ s -> s
392 traceBranchMatching :: Proximity -> Double -> [PhyloGroup] -> [PhyloGroup]
393 traceBranchMatching proxi thr groups = case proxi of
394 WeightedLogJaccard _ i s -> trace (
395 roundToStr 2 thr <> " "
396 <> foldl (\acc _ -> acc <> ".") "." [(10*i),(10*i + 10*s)..(10*thr)]
397 <> " " <> show(length groups) <> " groups"
405 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
406 intersectInit acc lst lst' =
407 if (null lst) || (null lst')
409 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
410 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
413 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
414 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
416 ngramsInBranches :: [[PhyloGroup]] -> [Int]
417 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
420 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
421 traceMatchSuccess thr qua qua' nextBranches =
422 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
423 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
424 <> ",(1.." <> show (length nextBranches) <> ")]"
425 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
426 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
427 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
430 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
431 traceMatchFailure thr qua qua' branches =
432 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
433 <> ",(1.." <> show (length branches) <> ")]"
434 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
435 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
439 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
440 traceMatchNoSplit branches =
441 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
442 <> ",(1.." <> show (length branches) <> ")]"
443 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
444 <> " - unable to split in smaller branches" <> "\n"
448 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
449 traceMatchLimit branches =
450 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
451 <> ",(1.." <> show (length branches) <> ")]"
452 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
453 <> " - unable to increase the threshold above 1" <> "\n"
457 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
458 traceMatchEnd groups =
459 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
460 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
463 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
464 traceTemporalMatching groups =
465 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups