]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloTools.hs
export done
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
16
17 module Gargantext.Viz.Phylo.PhyloTools where
18
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail)
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
23 import Data.String (String)
24 import Data.Text (Text, unwords)
25
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
28
29 import Debug.Trace (trace)
30 import Control.Lens hiding (Level)
31
32 import qualified Data.Vector as Vector
33 import qualified Data.List as List
34 import qualified Data.Set as Set
35
36 --------------
37 -- | Misc | --
38 --------------
39
40
41 countSup :: Double -> [Double] -> Int
42 countSup s l = length $ filter (>s) l
43
44
45 elemIndex' :: Eq a => a -> [a] -> Int
46 elemIndex' e l = case (List.elemIndex e l) of
47 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
48 Just i -> i
49
50
51 ---------------------
52 -- | Foundations | --
53 ---------------------
54
55
56 -- | Is this Ngrams a Foundations Root ?
57 isRoots :: Ngrams -> Vector Ngrams -> Bool
58 isRoots n ns = Vector.elem n ns
59
60 -- | To transform a list of nrams into a list of foundation's index
61 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
62 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
63
64 -- | To transform a list of Ngrams Indexes into a Label
65 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
66 ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
67
68
69 -- | To transform a list of Ngrams Indexes into a list of Text
70 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
71 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
72
73
74 --------------
75 -- | Time | --
76 --------------
77
78 -- | To transform a list of periods into a set of Dates
79 periodsToYears :: [(Date,Date)] -> Set Date
80 periodsToYears periods = (Set.fromList . sort . concat)
81 $ map (\(d,d') -> [d..d']) periods
82
83
84 findBounds :: [Date] -> (Date,Date)
85 findBounds dates =
86 let dates' = sort dates
87 in (head' "findBounds" dates', last' "findBounds" dates')
88
89
90 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
91 toPeriods dates p s =
92 let (start,end) = findBounds dates
93 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
94 $ chunkAlong p s [start .. end]
95
96
97 -- | Get a regular & ascendante timeScale from a given list of dates
98 toTimeScale :: [Date] -> Int -> [Date]
99 toTimeScale dates step =
100 let (start,end) = findBounds dates
101 in [start, (start + step) .. end]
102
103
104 getTimeStep :: TimeUnit -> Int
105 getTimeStep time = case time of
106 Year _ s _ -> s
107
108 getTimePeriod :: TimeUnit -> Int
109 getTimePeriod time = case time of
110 Year p _ _ -> p
111
112 getTimeFrame :: TimeUnit -> Int
113 getTimeFrame time = case time of
114 Year _ _ f -> f
115
116 -------------
117 -- | Fis | --
118 -------------
119
120
121 -- | To find if l' is nested in l
122 isNested :: Eq a => [a] -> [a] -> Bool
123 isNested l l'
124 | null l' = True
125 | length l' > length l = False
126 | (union l l') == l = True
127 | otherwise = False
128
129
130 -- | To filter Fis with small Support but by keeping non empty Periods
131 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
132 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
133 then keepFilled f (thr - 1) l
134 else f thr l
135
136
137 traceClique :: Map (Date, Date) [PhyloFis] -> String
138 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
139 where
140 --------------------------------------
141 cliques :: [Double]
142 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
143 --------------------------------------
144
145
146 traceSupport :: Map (Date, Date) [PhyloFis] -> String
147 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
148 where
149 --------------------------------------
150 supports :: [Double]
151 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
152 --------------------------------------
153
154
155 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
156 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
157 <> "Support : " <> (traceSupport mFis) <> "\n"
158 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
159
160
161 -------------------------
162 -- | Contextual unit | --
163 -------------------------
164
165
166 getFisSupport :: ContextualUnit -> Int
167 getFisSupport unit = case unit of
168 Fis s _ -> s
169 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
170
171 getFisSize :: ContextualUnit -> Int
172 getFisSize unit = case unit of
173 Fis _ s -> s
174 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
175
176
177 --------------
178 -- | Cooc | --
179 --------------
180
181
182 listToCombi' :: [a] -> [(a,a)]
183 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
184
185 listToEqual' :: Eq a => [a] -> [(a,a)]
186 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
187
188 listToKeys :: Eq a => [a] -> [(a,a)]
189 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
190
191 listToMatrix :: [Int] -> Map (Int,Int) Double
192 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
193
194 sumCooc :: Cooc -> Cooc -> Cooc
195 sumCooc cooc cooc' = unionWith (+) cooc cooc'
196
197 getTrace :: Cooc -> Double
198 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
199
200 --------------------
201 -- | PhyloGroup | --
202 --------------------
203
204 getGroupId :: PhyloGroup -> PhyloGroupId
205 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
206
207 ---------------
208 -- | Phylo | --
209 ---------------
210
211 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
212 addPointers group fil pty pointers =
213 case pty of
214 TemporalPointer -> case fil of
215 ToChilds -> group & phylo_groupPeriodChilds .~ pointers
216 ToParents -> group & phylo_groupPeriodParents .~ pointers
217 LevelPointer -> case fil of
218 ToChilds -> group & phylo_groupLevelChilds .~ pointers
219 ToParents -> group & phylo_groupLevelParents .~ pointers
220
221
222 getPeriodIds :: Phylo -> [(Date,Date)]
223 getPeriodIds phylo = sortOn fst
224 $ keys
225 $ phylo ^. phylo_periods
226
227
228 getConfig :: Phylo -> Config
229 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
230
231
232 getRoots :: Phylo -> Vector Ngrams
233 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
234
235
236 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
237 getGroupsFromLevel lvl phylo =
238 elems $ view ( phylo_periods
239 . traverse
240 . phylo_periodLevels
241 . traverse
242 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
243 . phylo_levelGroups ) phylo
244
245
246 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
247 updatePhyloGroups lvl m phylo =
248 over ( phylo_periods
249 . traverse
250 . phylo_periodLevels
251 . traverse
252 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
253 . phylo_levelGroups
254 . traverse
255 ) (\group ->
256 let id = getGroupId group
257 in
258 if member id m
259 then m ! id
260 else group ) phylo
261
262
263 -------------------
264 -- | Proximity | --
265 -------------------
266
267 getSensibility :: Proximity -> Double
268 getSensibility proxi = case proxi of
269 WeightedLogJaccard s _ _ -> s
270 Hamming -> undefined
271
272 getThresholdInit :: Proximity -> Double
273 getThresholdInit proxi = case proxi of
274 WeightedLogJaccard _ t _ -> t
275 Hamming -> undefined
276
277 getThresholdStep :: Proximity -> Double
278 getThresholdStep proxi = case proxi of
279 WeightedLogJaccard _ _ s -> s
280 Hamming -> undefined
281
282
283 ----------------
284 -- | Branch | --
285 ----------------
286
287 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
288 intersectInit acc lst lst' =
289 if (null lst) || (null lst')
290 then acc
291 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
292 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
293 else acc
294
295 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
296 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
297
298 ngramsInBranches :: [[PhyloGroup]] -> [Int]
299 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
300
301
302 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
303 traceMatchSuccess thr qua qua' nextBranches =
304 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
305 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
306 <> ",(1.." <> show (length nextBranches) <> ")]"
307 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
308 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
309 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
310
311
312 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
313 traceMatchFailure thr qua qua' branches =
314 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
315 <> ",(1.." <> show (length branches) <> ")]"
316 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
317 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
318 ) branches
319
320
321 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
322 traceMatchNoSplit branches =
323 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
324 <> ",(1.." <> show (length branches) <> ")]"
325 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
326 <> " - unable to split in smaller branches" <> "\n"
327 ) branches
328
329
330 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
331 traceMatchLimit branches =
332 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
333 <> ",(1.." <> show (length branches) <> ")]"
334 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
335 <> " - unable to increase the threshold above 1" <> "\n"
336 ) branches
337
338
339 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
340 traceMatchEnd groups =
341 trace ("\n" <> "-- | End of temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
342 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups