2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
17 module Gargantext.Viz.Phylo.PhyloTools where
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, intersect, (\\))
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
23 import Data.String (String)
24 import Data.Text (Text, unwords)
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
29 import Debug.Trace (trace)
30 import Control.Lens hiding (Level)
32 import qualified Data.Vector as Vector
33 import qualified Data.List as List
34 import qualified Data.Set as Set
41 countSup :: Double -> [Double] -> Int
42 countSup s l = length $ filter (>s) l
45 elemIndex' :: Eq a => a -> [a] -> Int
46 elemIndex' e l = case (List.elemIndex e l) of
47 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
56 -- | Is this Ngrams a Foundations Root ?
57 isRoots :: Ngrams -> Vector Ngrams -> Bool
58 isRoots n ns = Vector.elem n ns
60 -- | To transform a list of nrams into a list of foundation's index
61 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
62 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
64 -- | To transform a list of Ngrams Indexes into a Label
65 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
66 ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
69 -- | To transform a list of Ngrams Indexes into a list of Text
70 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
71 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
78 -- | To transform a list of periods into a set of Dates
79 periodsToYears :: [(Date,Date)] -> Set Date
80 periodsToYears periods = (Set.fromList . sort . concat)
81 $ map (\(d,d') -> [d..d']) periods
84 findBounds :: [Date] -> (Date,Date)
86 let dates' = sort dates
87 in (head' "findBounds" dates', last' "findBounds" dates')
90 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
92 let (start,end) = findBounds dates
93 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
94 $ chunkAlong p s [start .. end]
97 -- | Get a regular & ascendante timeScale from a given list of dates
98 toTimeScale :: [Date] -> Int -> [Date]
99 toTimeScale dates step =
100 let (start,end) = findBounds dates
101 in [start, (start + step) .. end]
104 getTimeStep :: TimeUnit -> Int
105 getTimeStep time = case time of
108 getTimePeriod :: TimeUnit -> Int
109 getTimePeriod time = case time of
112 getTimeFrame :: TimeUnit -> Int
113 getTimeFrame time = case time of
121 -- | To find if l' is nested in l
122 isNested :: Eq a => [a] -> [a] -> Bool
125 | length l' > length l = False
126 | (union l l') == l = True
130 -- | To filter Fis with small Support but by keeping non empty Periods
131 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
132 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
133 then keepFilled f (thr - 1) l
137 traceClique :: Map (Date, Date) [PhyloFis] -> String
138 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
140 --------------------------------------
142 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
143 --------------------------------------
146 traceSupport :: Map (Date, Date) [PhyloFis] -> String
147 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
149 --------------------------------------
151 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
152 --------------------------------------
155 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
156 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
157 <> "Support : " <> (traceSupport mFis) <> "\n"
158 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
161 -------------------------
162 -- | Contextual unit | --
163 -------------------------
166 getFisSupport :: ContextualUnit -> Int
167 getFisSupport unit = case unit of
169 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
171 getFisSize :: ContextualUnit -> Int
172 getFisSize unit = case unit of
174 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
181 listToCombi' :: [a] -> [(a,a)]
182 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
184 listToEqual' :: Eq a => [a] -> [(a,a)]
185 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
187 listToKeys :: Eq a => [a] -> [(a,a)]
188 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
190 listToMatrix :: [Int] -> Map (Int,Int) Double
191 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
193 sumCooc :: Cooc -> Cooc -> Cooc
194 sumCooc cooc cooc' = unionWith (+) cooc cooc'
196 getTrace :: Cooc -> Double
197 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
200 -- | To build the local cooc matrix of each phylogroup
201 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
202 ngramsToCooc ngrams coocs =
203 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
204 pairs = listToKeys ngrams
205 in filterWithKey (\k _ -> elem k pairs) cooc
212 getGroupId :: PhyloGroup -> PhyloGroupId
213 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
219 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
220 addPointers group fil pty pointers =
222 TemporalPointer -> case fil of
223 ToChilds -> group & phylo_groupPeriodChilds .~ pointers
224 ToParents -> group & phylo_groupPeriodParents .~ pointers
225 LevelPointer -> case fil of
226 ToChilds -> group & phylo_groupLevelChilds .~ pointers
227 ToParents -> group & phylo_groupLevelParents .~ pointers
230 getPeriodIds :: Phylo -> [(Date,Date)]
231 getPeriodIds phylo = sortOn fst
233 $ phylo ^. phylo_periods
235 getLastLevel :: Phylo -> Level
236 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
238 getLevels :: Phylo -> [Level]
239 getLevels phylo = nub
241 $ keys $ view ( phylo_periods
243 . phylo_periodLevels ) phylo
246 getConfig :: Phylo -> Config
247 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
250 getRoots :: Phylo -> Vector Ngrams
251 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
253 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
254 phyloToLastBranches phylo = elems
256 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
257 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
259 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
260 getGroupsFromLevel lvl phylo =
261 elems $ view ( phylo_periods
265 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
266 . phylo_levelGroups ) phylo
269 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
270 updatePhyloGroups lvl m phylo =
275 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
279 let id = getGroupId group
290 relatedComponents :: Eq a => [[a]] -> [[a]]
291 relatedComponents graphs = foldl' (\mem groups ->
295 let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
298 else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
305 getSensibility :: Proximity -> Double
306 getSensibility proxi = case proxi of
307 WeightedLogJaccard s _ _ -> s
310 getThresholdInit :: Proximity -> Double
311 getThresholdInit proxi = case proxi of
312 WeightedLogJaccard _ t _ -> t
315 getThresholdStep :: Proximity -> Double
316 getThresholdStep proxi = case proxi of
317 WeightedLogJaccard _ _ s -> s
325 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
326 intersectInit acc lst lst' =
327 if (null lst) || (null lst')
329 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
330 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
333 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
334 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
336 ngramsInBranches :: [[PhyloGroup]] -> [Int]
337 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
340 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
341 traceMatchSuccess thr qua qua' nextBranches =
342 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
343 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
344 <> ",(1.." <> show (length nextBranches) <> ")]"
345 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
346 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
347 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
350 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
351 traceMatchFailure thr qua qua' branches =
352 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
353 <> ",(1.." <> show (length branches) <> ")]"
354 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
355 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
359 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
360 traceMatchNoSplit branches =
361 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
362 <> ",(1.." <> show (length branches) <> ")]"
363 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
364 <> " - unable to split in smaller branches" <> "\n"
368 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
369 traceMatchLimit branches =
370 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
371 <> ",(1.." <> show (length branches) <> ")]"
372 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
373 <> " - unable to increase the threshold above 1" <> "\n"
377 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
378 traceMatchEnd groups =
379 trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
380 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups