]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloTools.hs
phyloQuality
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
16
17 module Gargantext.Viz.Phylo.PhyloTools where
18
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub)
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
23 import Data.String (String)
24
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27
28 import Debug.Trace (trace)
29 import Control.Lens hiding (Level)
30
31 import qualified Data.Vector as Vector
32 import qualified Data.List as List
33 import qualified Data.Set as Set
34
35 --------------
36 -- | Misc | --
37 --------------
38
39
40 countSup :: Double -> [Double] -> Int
41 countSup s l = length $ filter (>s) l
42
43
44 elemIndex' :: Eq a => a -> [a] -> Int
45 elemIndex' e l = case (List.elemIndex e l) of
46 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
47 Just i -> i
48
49
50 ---------------------
51 -- | Foundations | --
52 ---------------------
53
54
55 -- | Is this Ngrams a Foundations Root ?
56 isRoots :: Ngrams -> Vector Ngrams -> Bool
57 isRoots n ns = Vector.elem n ns
58
59 -- | To transform a list of nrams into a list of foundation's index
60 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
61 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
62
63
64 --------------
65 -- | Time | --
66 --------------
67
68 -- | To transform a list of periods into a set of Dates
69 periodsToYears :: [(Date,Date)] -> Set Date
70 periodsToYears periods = (Set.fromList . sort . concat)
71 $ map (\(d,d') -> [d..d']) periods
72
73
74 findBounds :: [Date] -> (Date,Date)
75 findBounds dates =
76 let dates' = sort dates
77 in (head' "findBounds" dates', last' "findBounds" dates')
78
79
80 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
81 toPeriods dates p s =
82 let (start,end) = findBounds dates
83 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
84 $ chunkAlong p s [start .. end]
85
86
87 -- | Get a regular & ascendante timeScale from a given list of dates
88 toTimeScale :: [Date] -> Int -> [Date]
89 toTimeScale dates step =
90 let (start,end) = findBounds dates
91 in [start, (start + step) .. end]
92
93
94 getTimeStep :: TimeUnit -> Int
95 getTimeStep time = case time of
96 Year _ s _ -> s
97
98 getTimePeriod :: TimeUnit -> Int
99 getTimePeriod time = case time of
100 Year p _ _ -> p
101
102 getTimeFrame :: TimeUnit -> Int
103 getTimeFrame time = case time of
104 Year _ _ f -> f
105
106 -------------
107 -- | Fis | --
108 -------------
109
110
111 -- | To find if l' is nested in l
112 isNested :: Eq a => [a] -> [a] -> Bool
113 isNested l l'
114 | null l' = True
115 | length l' > length l = False
116 | (union l l') == l = True
117 | otherwise = False
118
119
120 -- | To filter Fis with small Support but by keeping non empty Periods
121 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
122 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
123 then keepFilled f (thr - 1) l
124 else f thr l
125
126
127 traceClique :: Map (Date, Date) [PhyloFis] -> String
128 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
129 where
130 --------------------------------------
131 cliques :: [Double]
132 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
133 --------------------------------------
134
135
136 traceSupport :: Map (Date, Date) [PhyloFis] -> String
137 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
138 where
139 --------------------------------------
140 supports :: [Double]
141 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
142 --------------------------------------
143
144
145 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
146 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
147 <> "Support : " <> (traceSupport mFis) <> "\n"
148 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
149
150
151 -------------------------
152 -- | Contextual unit | --
153 -------------------------
154
155
156 getFisSupport :: ContextualUnit -> Int
157 getFisSupport unit = case unit of
158 Fis s _ -> s
159 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
160
161 getFisSize :: ContextualUnit -> Int
162 getFisSize unit = case unit of
163 Fis _ s -> s
164 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
165
166
167 --------------
168 -- | Cooc | --
169 --------------
170
171
172 listToCombi' :: [a] -> [(a,a)]
173 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
174
175 listToEqual' :: Eq a => [a] -> [(a,a)]
176 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
177
178 listToKeys :: Eq a => [a] -> [(a,a)]
179 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
180
181 listToMatrix :: [Int] -> Map (Int,Int) Double
182 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
183
184 sumCooc :: Cooc -> Cooc -> Cooc
185 sumCooc cooc cooc' = unionWith (+) cooc cooc'
186
187 getTrace :: Cooc -> Double
188 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
189
190 --------------------
191 -- | PhyloGroup | --
192 --------------------
193
194 getGroupId :: PhyloGroup -> PhyloGroupId
195 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
196
197 ---------------
198 -- | Phylo | --
199 ---------------
200
201 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
202 addPointers group fil pty pointers =
203 case pty of
204 TemporalPointer -> case fil of
205 ToChilds -> group & phylo_groupPeriodChilds %~ (++ pointers)
206 ToParents -> group & phylo_groupPeriodParents %~ (++ pointers)
207 LevelPointer -> case fil of
208 ToChilds -> group & phylo_groupLevelChilds %~ (++ pointers)
209 ToParents -> group & phylo_groupLevelParents %~ (++ pointers)
210
211
212 getPeriodIds :: Phylo -> [(Date,Date)]
213 getPeriodIds phylo = sortOn fst
214 $ keys
215 $ phylo ^. phylo_periods
216
217
218 getConfig :: Phylo -> Config
219 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
220
221
222 getRoots :: Phylo -> Vector Ngrams
223 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
224
225
226 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
227 getGroupsFromLevel lvl phylo =
228 elems $ view ( phylo_periods
229 . traverse
230 . phylo_periodLevels
231 . traverse
232 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
233 . phylo_levelGroups ) phylo
234
235
236 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
237 updatePhyloGroups lvl m phylo =
238 over ( phylo_periods
239 . traverse
240 . phylo_periodLevels
241 . traverse
242 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
243 . phylo_levelGroups
244 . traverse
245 ) (\group ->
246 let id = getGroupId group
247 in
248 if member id m
249 then m ! id
250 else group ) phylo
251
252
253 ------------------
254 -- | Pointers | --
255 ------------------
256
257
258 pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
259 pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
260
261
262 -------------------
263 -- | Proximity | --
264 -------------------
265
266 getSensibility :: Proximity -> Double
267 getSensibility proxi = case proxi of
268 WeightedLogJaccard s _ _ -> s
269 Hamming -> undefined
270
271 getThresholdInit :: Proximity -> Double
272 getThresholdInit proxi = case proxi of
273 WeightedLogJaccard _ t _ -> t
274 Hamming -> undefined
275
276 getThresholdStep :: Proximity -> Double
277 getThresholdStep proxi = case proxi of
278 WeightedLogJaccard _ _ s -> s
279 Hamming -> undefined
280
281
282 ----------------
283 -- | Branch | --
284 ----------------
285
286 ngramsInBranches :: [[PhyloGroup]] -> [Int]
287 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches