]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloTools.hs
ready for quality score
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE FlexibleContexts #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE RankNTypes #-}
15 {-# LANGUAGE ViewPatterns #-}
16
17 module Gargantext.Viz.Phylo.PhyloTools where
18
19 import Data.Vector (Vector, elemIndex)
20 import Data.List (sort, concat, null, union, (++), tails, sortOn)
21 import Data.Set (Set, size)
22 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!))
23 import Data.String (String)
24
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27
28 import Debug.Trace (trace)
29 import Control.Lens hiding (Level)
30
31 import qualified Data.Vector as Vector
32 import qualified Data.List as List
33 import qualified Data.Set as Set
34
35 --------------
36 -- | Misc | --
37 --------------
38
39
40 countSup :: Double -> [Double] -> Int
41 countSup s l = length $ filter (>s) l
42
43
44 elemIndex' :: Eq a => a -> [a] -> Int
45 elemIndex' e l = case (List.elemIndex e l) of
46 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
47 Just i -> i
48
49
50 ---------------------
51 -- | Foundations | --
52 ---------------------
53
54
55 -- | Is this Ngrams a Foundations Root ?
56 isRoots :: Ngrams -> Vector Ngrams -> Bool
57 isRoots n ns = Vector.elem n ns
58
59 -- | To transform a list of nrams into a list of foundation's index
60 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
61 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
62
63
64 --------------
65 -- | Time | --
66 --------------
67
68 -- | To transform a list of periods into a set of Dates
69 periodsToYears :: [(Date,Date)] -> Set Date
70 periodsToYears periods = (Set.fromList . sort . concat)
71 $ map (\(d,d') -> [d..d']) periods
72
73
74 findBounds :: [Date] -> (Date,Date)
75 findBounds dates =
76 let dates' = sort dates
77 in (head' "findBounds" dates', last' "findBounds" dates')
78
79
80 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
81 toPeriods dates p s =
82 let (start,end) = findBounds dates
83 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
84 $ chunkAlong p s [start .. end]
85
86
87 -- | Get a regular & ascendante timeScale from a given list of dates
88 toTimeScale :: [Date] -> Int -> [Date]
89 toTimeScale dates step =
90 let (start,end) = findBounds dates
91 in [start, (start + step) .. end]
92
93
94 getTimeStep :: TimeUnit -> Int
95 getTimeStep time = case time of
96 Year _ s _ -> s
97
98 getTimePeriod :: TimeUnit -> Int
99 getTimePeriod time = case time of
100 Year p _ _ -> p
101
102 getTimeFrame :: TimeUnit -> Int
103 getTimeFrame time = case time of
104 Year _ _ f -> f
105
106 -------------
107 -- | Fis | --
108 -------------
109
110
111 -- | To find if l' is nested in l
112 isNested :: Eq a => [a] -> [a] -> Bool
113 isNested l l'
114 | null l' = True
115 | length l' > length l = False
116 | (union l l') == l = True
117 | otherwise = False
118
119
120 -- | To filter Fis with small Support but by keeping non empty Periods
121 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
122 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
123 then keepFilled f (thr - 1) l
124 else f thr l
125
126
127 traceClique :: Map (Date, Date) [PhyloFis] -> String
128 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
129 where
130 --------------------------------------
131 cliques :: [Double]
132 cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
133 --------------------------------------
134
135
136 traceSupport :: Map (Date, Date) [PhyloFis] -> String
137 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
138 where
139 --------------------------------------
140 supports :: [Double]
141 supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
142 --------------------------------------
143
144
145 traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
146 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
147 <> "Support : " <> (traceSupport mFis) <> "\n"
148 <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
149
150
151 -------------------------
152 -- | Contextual unit | --
153 -------------------------
154
155
156 getFisSupport :: ContextualUnit -> Int
157 getFisSupport unit = case unit of
158 Fis s _ -> s
159 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
160
161 getFisSize :: ContextualUnit -> Int
162 getFisSize unit = case unit of
163 Fis _ s -> s
164 -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
165
166
167 --------------
168 -- | Cooc | --
169 --------------
170
171
172 listToCombi' :: [a] -> [(a,a)]
173 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
174
175 listToEqual' :: Eq a => [a] -> [(a,a)]
176 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
177
178 listToKeys :: Eq a => [a] -> [(a,a)]
179 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
180
181 listToMatrix :: [Int] -> Map (Int,Int) Double
182 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
183
184 sumCooc :: Cooc -> Cooc -> Cooc
185 sumCooc cooc cooc' = unionWith (+) cooc cooc'
186
187 --------------------
188 -- | PhyloGroup | --
189 --------------------
190
191 getGroupId :: PhyloGroup -> PhyloGroupId
192 getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
193
194 ---------------
195 -- | Phylo | --
196 ---------------
197
198 addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
199 addPointers group fil pty pointers =
200 case pty of
201 TemporalPointer -> case fil of
202 ToChilds -> group & phylo_groupPeriodChilds %~ (++ pointers)
203 ToParents -> group & phylo_groupPeriodParents %~ (++ pointers)
204 LevelPointer -> case fil of
205 ToChilds -> group & phylo_groupLevelChilds %~ (++ pointers)
206 ToParents -> group & phylo_groupLevelParents %~ (++ pointers)
207
208
209 getPeriodIds :: Phylo -> [(Date,Date)]
210 getPeriodIds phylo = sortOn fst
211 $ keys
212 $ phylo ^. phylo_periods
213
214
215 getConfig :: Phylo -> Config
216 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
217
218
219 getRoots :: Phylo -> Vector Ngrams
220 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
221
222
223 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
224 getGroupsFromLevel lvl phylo =
225 elems $ view ( phylo_periods
226 . traverse
227 . phylo_periodLevels
228 . traverse
229 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
230 . phylo_levelGroups ) phylo
231
232
233 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
234 updatePhyloGroups lvl m phylo =
235 over ( phylo_periods
236 . traverse
237 . phylo_periodLevels
238 . traverse
239 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
240 . phylo_levelGroups
241 . traverse
242 ) (\group ->
243 let id = getGroupId group
244 in
245 if member id m
246 then m ! id
247 else group ) phylo
248
249
250 ------------------
251 -- | Pointers | --
252 ------------------
253
254
255 pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
256 pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
257
258 -- mergeLinks :: [Link] -> [Link] -> [Link]
259 -- mergeLinks toChilds toParents =
260 -- let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
261 -- in toList $ unionWith max (fromList toParents) toChilds'
262
263
264 -------------------
265 -- | Proximity | --
266 -------------------
267
268 getSensibility :: Proximity -> Double
269 getSensibility proxi = case proxi of
270 WeightedLogJaccard s _ _ -> s
271 Hamming -> undefined
272
273 getThresholdInit :: Proximity -> Double
274 getThresholdInit proxi = case proxi of
275 WeightedLogJaccard _ t _ -> t
276 Hamming -> undefined
277
278 getThresholdStep :: Proximity -> Double
279 getThresholdStep proxi = case proxi of
280 WeightedLogJaccard _ _ s -> s
281 Hamming -> undefined