2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
17 module Gargantext.Viz.Phylo.PhyloTools where
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init)
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
23 import Data.String (String)
24 import Data.Text (Text, unwords)
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
29 import Debug.Trace (trace)
30 import Control.Lens hiding (Level)
32 import qualified Data.Vector as Vector
33 import qualified Data.List as List
34 import qualified Data.Set as Set
41 countSup :: Double -> [Double] -> Int
42 countSup s l = length $ filter (>s) l
45 elemIndex' :: Eq a => a -> [a] -> Int
46 elemIndex' e l = case (List.elemIndex e l) of
47 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
56 -- | Is this Ngrams a Foundations Root ?
57 isRoots :: Ngrams -> Vector Ngrams -> Bool
58 isRoots n ns = Vector.elem n ns
60 -- | To transform a list of nrams into a list of foundation's index
61 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
62 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
64 -- | To transform a list of Ngrams Indexes into a Label
65 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
66 ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
69 -- | To transform a list of Ngrams Indexes into a list of Text
70 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
71 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
78 -- | To transform a list of periods into a set of Dates
79 periodsToYears :: [(Date,Date)] -> Set Date
80 periodsToYears periods = (Set.fromList . sort . concat)
81 $ map (\(d,d') -> [d..d']) periods
84 findBounds :: [Date] -> (Date,Date)
86 let dates' = sort dates
87 in (head' "findBounds" dates', last' "findBounds" dates')
90 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
92 let (start,end) = findBounds dates
93 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
94 $ chunkAlong p s [start .. end]
97 -- | Get a regular & ascendante timeScale from a given list of dates
98 toTimeScale :: [Date] -> Int -> [Date]
99 toTimeScale dates step =
100 let (start,end) = findBounds dates
101 in [start, (start + step) .. end]
104 getTimeStep :: TimeUnit -> Int
105 getTimeStep time = case time of
108 getTimePeriod :: TimeUnit -> Int
109 getTimePeriod time = case time of
112 getTimeFrame :: TimeUnit -> Int
113 getTimeFrame time = case time of
121 -- | To find if l' is nested in l
122 isNested :: Eq a => [a] -> [a] -> Bool
125 | length l' > length l = False
126 | (union l l') == l = True
130 -- | To filter Fis with small Support but by keeping non empty Periods
131 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
132 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
133 then keepFilled f (thr - 1) l
137 traceClique :: Map (Date, Date) [PhyloFis] -> String
138 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
140 --------------------------------------
142 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
143 --------------------------------------
146 traceSupport :: Map (Date, Date) [PhyloFis] -> String
147 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
149 --------------------------------------
151 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
152 --------------------------------------
155 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
156 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
157 <> "Support : " <> (traceSupport mFis) <> "\n"
158 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
161 -------------------------
162 -- | Contextual unit | --
163 -------------------------
166 getFisSupport :: ContextualUnit -> Int
167 getFisSupport unit = case unit of
169 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
171 getFisSize :: ContextualUnit -> Int
172 getFisSize unit = case unit of
174 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
182 listToCombi' :: [a] -> [(a,a)]
183 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
185 listToEqual' :: Eq a => [a] -> [(a,a)]
186 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
188 listToKeys :: Eq a => [a] -> [(a,a)]
189 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
191 listToMatrix :: [Int] -> Map (Int,Int) Double
192 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
194 sumCooc :: Cooc -> Cooc -> Cooc
195 sumCooc cooc cooc' = unionWith (+) cooc cooc'
197 getTrace :: Cooc -> Double
198 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
204 getGroupId :: PhyloGroup -> PhyloGroupId
205 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
211 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
212 addPointers group fil pty pointers =
214 TemporalPointer -> case fil of
215 ToChilds -> group & phylo_groupPeriodChilds .~ pointers
216 ToParents -> group & phylo_groupPeriodParents .~ pointers
217 LevelPointer -> case fil of
218 ToChilds -> group & phylo_groupLevelChilds .~ pointers
219 ToParents -> group & phylo_groupLevelParents .~ pointers
222 getPeriodIds :: Phylo -> [(Date,Date)]
223 getPeriodIds phylo = sortOn fst
225 $ phylo ^. phylo_periods
228 getConfig :: Phylo -> Config
229 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
232 getRoots :: Phylo -> Vector Ngrams
233 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
236 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
237 getGroupsFromLevel lvl phylo =
238 elems $ view ( phylo_periods
242 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
243 . phylo_levelGroups ) phylo
246 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
247 updatePhyloGroups lvl m phylo =
252 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
256 let id = getGroupId group
267 getSensibility :: Proximity -> Double
268 getSensibility proxi = case proxi of
269 WeightedLogJaccard s _ _ -> s
272 getThresholdInit :: Proximity -> Double
273 getThresholdInit proxi = case proxi of
274 WeightedLogJaccard _ t _ -> t
277 getThresholdStep :: Proximity -> Double
278 getThresholdStep proxi = case proxi of
279 WeightedLogJaccard _ _ s -> s
287 ngramsInBranches :: [[PhyloGroup]] -> [Int]
288 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
291 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
292 traceMatchSuccess thr qua qua' nextBranches =
293 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
294 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
295 <> ",(1.." <> show (length nextBranches) <> ")]"
296 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
297 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
298 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
301 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
302 traceMatchFailure thr qua qua' branches =
303 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
304 <> ",(1.." <> show (length branches) <> ")]"
305 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
306 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
310 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
311 traceMatchNoSplit branches =
312 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
313 <> ",(1.." <> show (length branches) <> ")]"
314 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
315 <> " - unable to split in smaller branches" <> "\n"
319 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
320 traceMatchLimit branches =
321 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
322 <> ",(1.." <> show (length branches) <> ")]"
323 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
324 <> " - unable to increase the threshold above 1" <> "\n"
328 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
329 traceMatchEnd groups =
330 trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
331 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups