2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Tools
20 import Control.Lens hiding (both, Level)
21 import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails, delete, nub, concat, union, sortOn)
22 import Data.Maybe (mapMaybe)
23 import Data.Map (Map, mapKeys, member, elems, adjust, (!))
25 import Data.Text (Text, toLower)
26 import Data.Tuple.Extra
27 import Data.Vector (Vector,elemIndex)
28 import Gargantext.Prelude hiding (head)
29 import Gargantext.Viz.Phylo
31 import qualified Data.List as List
32 import qualified Data.Map as Map
33 import qualified Data.Set as Set
34 import qualified Data.Vector as Vector
37 ------------------------------------------------------------------------
41 -- | To alter a PhyloGroup matching a given Level
42 alterGroupWithLevel :: (PhyloGroup -> PhyloGroup) -> Level -> Phylo -> Phylo
43 alterGroupWithLevel f lvl p = over ( phylo_periods
49 ) (\g -> if getGroupLevel g == lvl
54 -- | To alter each list of PhyloGroups following a given function
55 alterPhyloGroups :: ([PhyloGroup] -> [PhyloGroup]) -> Phylo -> Phylo
56 alterPhyloGroups f p = over ( phylo_periods
64 -- | To alter each PhyloPeriod of a Phylo following a given function
65 alterPhyloPeriods :: (PhyloPeriod -> PhyloPeriod) -> Phylo -> Phylo
66 alterPhyloPeriods f p = over ( phylo_periods
70 -- | To alter a list of PhyloLevels following a given function
71 alterPhyloLevels :: ([PhyloLevel] -> [PhyloLevel]) -> Phylo -> Phylo
72 alterPhyloLevels f p = over ( phylo_periods
74 . phylo_periodLevels) f p
77 -- | To append a list of PhyloPeriod to a Phylo
78 appendToPhyloPeriods :: [PhyloPeriod] -> Phylo -> Phylo
79 appendToPhyloPeriods l p = over (phylo_periods) (++ l) p
82 -- | Does a List of Sets contains at least one Set of an other List
83 doesAnySetContains :: Eq a => Set a -> [Set a] -> [Set a] -> Bool
84 doesAnySetContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l)
87 -- | Does a list of A contains an other list of A
88 doesContains :: Eq a => [a] -> [a] -> Bool
91 | length l' > length l = False
92 | elem (head l') l = doesContains l (tail l')
96 -- | Does a list of ordered A contains an other list of ordered A
97 doesContainsOrd :: Eq a => Ord a => [a] -> [a] -> Bool
100 | last l < head l' = False
101 | head l' `elem` l = True
102 | otherwise = doesContainsOrd l (tail l')
105 -- | To filter the PhyloGroup of a Phylo according to a function and a value
106 filterGroups :: Eq a => (PhyloGroup -> a) -> a -> [PhyloGroup] -> [PhyloGroup]
107 filterGroups f x l = filter (\g -> (f g) == x) l
110 -- | To filter nested Sets of a
111 filterNestedSets :: Eq a => Set a -> [Set a] -> [Set a] -> [Set a]
112 filterNestedSets h l l'
113 | null l = if doesAnySetContains h l l'
116 | doesAnySetContains h l l' = filterNestedSets (head l) (tail l) l'
117 | otherwise = filterNestedSets (head l) (tail l) (h : l')
120 -- | To filter some GroupEdges with a given threshold
121 filterGroupEdges :: Double -> GroupEdges -> GroupEdges
122 filterGroupEdges thr edges = filter (\((s,t),w) -> w > thr) edges
125 -- | To get the PhyloBranchId of a PhyloBranch
126 getBranchId :: PhyloBranch -> PhyloBranchId
127 getBranchId b = b ^. phylo_branchId
130 -- | To get a list of PhyloBranchIds given a Level in a Phylo
131 getBranchIdsWith :: Level -> Phylo -> [PhyloBranchId]
132 getBranchIdsWith lvl p = sortOn snd
133 $ mapMaybe getGroupBranchId
134 $ getGroupsWithLevel lvl p
137 -- | To get the Meta value of a PhyloBranch
138 getBranchMeta :: Text -> PhyloBranch -> Double
139 getBranchMeta k b = (b ^. phylo_branchMeta) ! k
142 -- | To get the Name of a Clustering Methods
143 getClusterName :: QueryClustering -> Clustering
144 getClusterName c = _qc_name c
147 -- | To get the params of a Clustering Methods
148 getClusterPNum :: QueryClustering -> Text -> Double
149 getClusterPNum c k = if (member k $ _qc_pNum c)
150 then (_qc_pNum c) Map.! k
151 else panic "[ERR][Viz.Phylo.Tools.getClusterParam] the key is not in params"
154 -- | To get the boolean params of a Clustering Methods
155 getClusterPBool :: QueryClustering -> Text -> Bool
156 getClusterPBool c k = if (member k $ _qc_pBool c)
157 then (_qc_pBool c) Map.! k
158 else panic "[ERR][Viz.Phylo.Tools.getClusterParamBool] the key is not in paramsBool"
161 -- | To get a numeric param from a given QueryFilter
162 getFilterPNum :: QueryFilter -> Text -> Double
163 getFilterPNum f k = if (member k $ f ^. qf_pNum)
164 then (f ^. qf_pNum) Map.! k
165 else panic "[ERR][Viz.Phylo.Tools.getFilterPNum] the key is not in pNum"
168 -- | To get a boolean param from a given QueryFilter
169 getFilterPBool :: QueryFilter -> Text -> Bool
170 getFilterPBool f k = if (member k $ f ^. qf_pBool)
171 then (f ^. qf_pBool) Map.! k
172 else panic "[ERR][Viz.Phylo.Tools.getFilterPBool] the key is not in pBool"
175 -- | To get the first clustering method to apply to get the level 1 of a Phylo
176 getFstCluster :: PhyloQuery -> QueryClustering
177 getFstCluster q = q ^. q_fstCluster
180 -- | To get the foundations of a Phylo
181 getFoundations :: Phylo -> Vector Ngrams
182 getFoundations = _phylo_foundations
185 -- | To get the Index of a Ngrams in the Foundations of a Phylo
186 getIdxInFoundations :: Ngrams -> Phylo -> Int
187 getIdxInFoundations n p = case (elemIndex n (getFoundations p)) of
188 Nothing -> panic "[ERR][Viz.Phylo.Tools.getFoundationIdx] Ngrams not in Foundations"
192 -- | To maybe get the PhyloBranchId of a PhyloGroup
193 getGroupBranchId :: PhyloGroup -> Maybe PhyloBranchId
194 getGroupBranchId = _phylo_groupBranchId
197 -- | To get the PhyloGroups Childs of a PhyloGroup
198 getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup]
199 getGroupChilds g p = getGroupsFromIds (getGroupPeriodChildsId g) p
202 -- | To get the id of a PhyloGroup
203 getGroupId :: PhyloGroup -> PhyloGroupId
204 getGroupId = _phylo_groupId
207 -- | To get the Cooc Matrix of a PhyloGroup
208 getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
209 getGroupCooc = _phylo_groupCooc
212 -- | To get the level out of the id of a PhyloGroup
213 getGroupLevel :: PhyloGroup -> Int
214 getGroupLevel = snd . fst . getGroupId
217 -- | To get the level child pointers of a PhyloGroup
218 getGroupLevelChilds :: PhyloGroup -> [Pointer]
219 getGroupLevelChilds = _phylo_groupLevelChilds
222 -- | To get the PhyloGroups Level Childs Ids of a PhyloGroup
223 getGroupLevelChildsId :: PhyloGroup -> [PhyloGroupId]
224 getGroupLevelChildsId g = map fst $ getGroupLevelChilds g
227 -- | To get the level parent pointers of a PhyloGroup
228 getGroupLevelParents :: PhyloGroup -> [Pointer]
229 getGroupLevelParents = _phylo_groupLevelParents
232 -- | To get the PhyloGroups Level Parents Ids of a PhyloGroup
233 getGroupLevelParentsId :: PhyloGroup -> [PhyloGroupId]
234 getGroupLevelParentsId g = map fst $ getGroupLevelParents g
237 -- | To get the Ngrams of a PhyloGroup
238 getGroupNgrams :: PhyloGroup -> [Int]
239 getGroupNgrams = _phylo_groupNgrams
242 -- | To get the list of pairs (Childs & Parents) of a PhyloGroup
243 getGroupPairs :: PhyloGroup -> Phylo -> [PhyloGroup]
244 getGroupPairs g p = (getGroupChilds g p) ++ (getGroupParents g p)
247 -- | To get the PhyloGroups Parents of a PhyloGroup
248 getGroupParents :: PhyloGroup -> Phylo -> [PhyloGroup]
249 getGroupParents g p = getGroupsFromIds (getGroupPeriodParentsId g) p
252 -- | To get the period out of the id of a PhyloGroup
253 getGroupPeriod :: PhyloGroup -> (Date,Date)
254 getGroupPeriod = fst . fst . getGroupId
257 -- | To get the period child pointers of a PhyloGroup
258 getGroupPeriodChilds :: PhyloGroup -> [Pointer]
259 getGroupPeriodChilds = _phylo_groupPeriodChilds
262 -- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
263 getGroupPeriodChildsId :: PhyloGroup -> [PhyloGroupId]
264 getGroupPeriodChildsId g = map fst $ getGroupPeriodChilds g
267 -- | To get the period parent pointers of a PhyloGroup
268 getGroupPeriodParents :: PhyloGroup -> [Pointer]
269 getGroupPeriodParents = _phylo_groupPeriodParents
272 -- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
273 getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
274 getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
277 -- | To get all the PhyloGroup of a Phylo
278 getGroups :: Phylo -> [PhyloGroup]
279 getGroups = view ( phylo_periods
287 -- | To get all PhyloGroups matching a list of PhyloGroupIds in a Phylo
288 getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
289 getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
292 -- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
293 getGroupsFromNodes :: [PhyloNode] -> Phylo -> [PhyloGroup]
294 getGroupsFromNodes ns p = getGroupsFromIds (map getNodeId ns) p
297 -- | To get all the PhyloGroup of a Phylo with a given level and period
298 getGroupsWithFilters :: Int -> (Date,Date) -> Phylo -> [PhyloGroup]
299 getGroupsWithFilters lvl prd p = (getGroupsWithLevel lvl p)
301 (getGroupsWithPeriod prd p)
304 -- | To get all the PhyloGroup of a Phylo with a given Level
305 getGroupsWithLevel :: Int -> Phylo -> [PhyloGroup]
306 getGroupsWithLevel lvl p = filterGroups getGroupLevel lvl (getGroups p)
309 -- | To get all the PhyloGroup of a Phylo with a given Period
310 getGroupsWithPeriod :: (Date,Date) -> Phylo -> [PhyloGroup]
311 getGroupsWithPeriod prd p = filterGroups getGroupPeriod prd (getGroups p)
314 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
315 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
316 getKeyPair (x,y) m = case findPair (x,y) m of
317 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
320 --------------------------------------
321 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
323 | member (x,y) m = Just (x,y)
324 | member (y,x) m = Just (y,x)
325 | otherwise = Nothing
326 --------------------------------------
329 -- | To get the last computed Level in a Phylo
330 getLastLevel :: Phylo -> Level
331 getLastLevel p = (last . sort)
332 $ map (snd . getPhyloLevelId)
333 $ view ( phylo_periods
335 . phylo_periodLevels ) p
339 -- | To get the neighbours (directed/undirected) of a PhyloGroup from a list of GroupEdges
340 getNeighbours :: Bool -> PhyloGroup -> GroupEdges -> [PhyloGroup]
341 getNeighbours directed g e = case directed of
342 True -> map (\((s,t),w) -> t)
343 $ filter (\((s,t),w) -> s == g) e
344 False -> map (\((s,t),w) -> head $ delete g $ nub [s,t,g])
345 $ filter (\((s,t),w) -> s == g || t == g) e
348 -- | To get the PhyloBranchId of PhyloNode if it exists
349 getNodeBranchId :: PhyloNode -> PhyloBranchId
350 getNodeBranchId n = case n ^. phylo_nodeBranchId of
351 Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeBranchId] branchId not found"
355 -- | To get the PhyloGroupId of a PhyloNode
356 getNodeId :: PhyloNode -> PhyloGroupId
357 getNodeId n = n ^. phylo_nodeId
360 -- | To get the Level of a PhyloNode
361 getNodeLevel :: PhyloNode -> Level
362 getNodeLevel n = (snd . fst) $ getNodeId n
365 -- | To get the Parent Node of a PhyloNode in a PhyloView
366 getNodeParent :: PhyloNode -> PhyloView -> PhyloNode
367 getNodeParent n v = head
368 $ filter (\n' -> getNodeId n' == getNodeParentId n)
369 $ v ^. phylo_viewNodes
372 -- | To get the Parent Node id of a PhyloNode if it exists
373 getNodeParentId :: PhyloNode -> PhyloGroupId
374 getNodeParentId n = case n ^. phylo_nodeParent of
375 Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeParentId] node parent not found"
379 -- | To get a list of PhyloNodes grouped by PhyloBranch in a PhyloView
380 getNodesByBranches :: PhyloView -> [(PhyloBranchId,[PhyloNode])]
381 getNodesByBranches v = zip bIds $ map (\id -> filter (\n -> (getNodeBranchId n) == id)
382 $ getNodesInBranches v ) bIds
384 --------------------------------------
385 bIds :: [PhyloBranchId]
386 bIds = getViewBranchIds v
387 --------------------------------------
390 -- | To get a list of PhyloNodes owned by any PhyloBranches in a PhyloView
391 getNodesInBranches :: PhyloView -> [PhyloNode]
392 getNodesInBranches v = filter (\n -> isJust $ n ^. phylo_nodeBranchId)
393 $ v ^. phylo_viewNodes
396 -- | To get the cluster methods to apply to the Nths levels of a Phylo
397 getNthCluster :: PhyloQuery -> QueryClustering
398 getNthCluster q = q ^. q_nthCluster
401 -- | To get the Sup Level of a reconstruction of a Phylo from a PhyloQuery
402 getNthLevel :: PhyloQuery -> Level
403 getNthLevel q = q ^. q_nthLevel
406 -- | To get the PhylolevelId of a given PhyloLevel
407 getPhyloLevelId :: PhyloLevel -> PhyloLevelId
408 getPhyloLevelId = _phylo_levelId
411 -- | To get all the Phylolevels of a given PhyloPeriod
412 getPhyloLevels :: PhyloPeriod -> [PhyloLevel]
413 getPhyloLevels = view (phylo_periodLevels)
416 -- | To get all the PhyloPeriodIds of a Phylo
417 getPhyloPeriods :: Phylo -> [PhyloPeriodId]
418 getPhyloPeriods p = map _phylo_periodId
419 $ view (phylo_periods) p
422 -- | To get the id of a given PhyloPeriod
423 getPhyloPeriodId :: PhyloPeriod -> PhyloPeriodId
424 getPhyloPeriodId prd = _phylo_periodId prd
427 -- | To get the sensibility of a Proximity if it exists
428 getSensibility :: QueryProximity -> Double
429 getSensibility prox = if (member "sensibility" $ prox ^. qp_pNum)
430 then (prox ^. qp_pNum) ! "sensibility"
431 else panic "[ERR][Viz.Phylo.Tools.getSensibility] sensibility not in params"
434 -- | To get the PhyloGroupId of the Source of a PhyloEdge
435 getSourceId :: PhyloEdge -> PhyloGroupId
436 getSourceId e = e ^. phylo_edgeSource
439 -- | To get the PhyloGroupId of the Target of a PhyloEdge
440 getTargetId :: PhyloEdge -> PhyloGroupId
441 getTargetId e = e ^. phylo_edgeTarget
444 -- | To get the Grain of the PhyloPeriods from a PhyloQuery
445 getPeriodGrain :: PhyloQuery -> Int
446 getPeriodGrain q = q ^. q_periodGrain
449 -- | To get the intertemporal matching strategy to apply to a Phylo from a PhyloQuery
450 getInterTemporalMatching :: PhyloQuery -> QueryProximity
451 getInterTemporalMatching q = q ^. q_interTemporalMatching
454 -- | To get the Steps of the PhyloPeriods from a PhyloQuery
455 getPeriodSteps :: PhyloQuery -> Int
456 getPeriodSteps q = q ^. q_periodSteps
459 -- | To get all the PhyloBranchIds of a PhyloView
460 getViewBranchIds :: PhyloView -> [PhyloBranchId]
461 getViewBranchIds v = map getBranchId $ v ^. phylo_viewBranches
464 -- | To init the foundation of the Phylo as a Vector of Ngrams
465 initFoundations :: [Ngrams] -> Vector Ngrams
466 initFoundations l = Vector.fromList $ map toLower l
469 -- | To create a PhyloGroup in a Phylo out of a list of Ngrams and a set of parameters
470 initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
471 initGroup ngrams lbl idx lvl from to p = PhyloGroup
472 (((from, to), lvl), idx)
474 (sort $ map (\x -> getIdxInFoundations x p) ngrams)
481 -- | To init the Base of a Phylo from a List of Periods and Foundations
482 initPhyloBase :: [(Date, Date)] -> Vector Ngrams -> Phylo
483 initPhyloBase pds fds = Phylo ((fst . head) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds)
486 -- | To create a PhyloLevel
487 initPhyloLevel :: PhyloLevelId -> [PhyloGroup] -> PhyloLevel
488 initPhyloLevel id groups = PhyloLevel id groups
491 -- | To create a PhyloPeriod
492 initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
493 initPhyloPeriod id l = PhyloPeriod id l
496 -- | To filter Fis with small Support but by keeping non empty Periods
497 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
498 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
499 then keepFilled f (thr - 1) l
503 -- | To get all combinations of a list
504 listToDirectedCombi :: Eq a => [a] -> [(a,a)]
505 listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
508 -- | To get all combinations of a list and apply a function to the resulting list of pairs
509 listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
510 listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
513 -- | To get all combinations of a list with no repetition
514 listToUnDirectedCombi :: [a] -> [(a,a)]
515 listToUnDirectedCombi l = [ (x,y) | (x:rest) <- tails l, y <- rest ]
518 -- | To get all combinations of a list with no repetition and apply a function to the resulting list of pairs
519 listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
520 listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
523 -- | To set the LevelId of a PhyloLevel and of all its PhyloGroups
524 setPhyloLevelId :: Int -> PhyloLevel -> PhyloLevel
525 setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups)
526 = PhyloLevel (id, lvl') groups'
528 groups' = over (traverse . phylo_groupId) (\((period, lvl), idx) -> ((period, lvl'), idx)) groups
531 -- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
532 unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
533 unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2