]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
working on perf
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
23
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
37
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
41
42 --------------------
43 -- | Dot export | --
44 --------------------
45
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
48
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
51
52 dynamicToColor :: Double -> H.Attribute
53 dynamicToColor d
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
58
59 pickLabelColor :: [Double] -> H.Attribute
60 pickLabelColor lst
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
65
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
68
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
71
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
74
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
77
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
80
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
83
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
94 where
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
110
111 branchToDotNode :: PhyloBranch -> Dot DotId
112 branchToDotNode b =
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ show (snd $ b ^. branch_id)) ])
118
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
126
127
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ show (snd $ g ^. phylo_groupBranchId))])
136
137
138 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
139 toDotEdge source target lbl edgeType = edge source target
140 (case edgeType of
141 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
142 , Label (StrLabel $ fromStrict lbl)]
143 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
144 , Label (StrLabel $ fromStrict lbl)]
145 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
146 , Label (StrLabel $ fromStrict lbl)]
147 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
148
149
150 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
151 mergePointers groups =
152 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
153 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
154 in unionWith (\w w' -> max w w') toChilds toParents
155
156
157 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
158 exportToDot phylo export =
159 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
160
161 -- | 1) init the dot graph
162 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
163 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
164 , Ratio FillRatio
165 , Style [SItem Filled []],Color [toWColor White]])
166
167 -- | 2) create a layer for the branches labels
168 subgraph (Str "Branches peaks") $ do
169
170 graphAttrs [Rank SameRank]
171
172 -- | 3) group the branches by hierarchy
173 mapM (\branches ->
174 subgraph (Str "Branches clade") $ do
175 graphAttrs [Rank SameRank]
176
177 -- | 4) create a node for each branch
178 mapM branchToDotNode branches
179 ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
180
181 -- | 5) create a layer for each period
182 _ <- mapM (\period ->
183 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
184 graphAttrs [Rank SameRank]
185 periodToDotNode period
186
187 -- | 6) create a node for each group
188 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
189 ) $ getPeriodIds phylo
190
191 -- | 7) create the edges between a branch and its first groups
192 _ <- mapM (\(bId,groups) ->
193 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
194 )
195 $ toList
196 $ map (\groups -> head' "toDot"
197 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
198 $ sortOn (fst . _phylo_groupPeriod) groups)
199 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
200
201 -- | 8) create the edges between the groups
202 _ <- mapM (\((k,k'),_) ->
203 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
204 ) $ (toList . mergePointers) $ export ^. export_groups
205
206 -- | 7) create the edges between the periods
207 _ <- mapM (\(prd,prd') ->
208 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
209 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
210
211 -- | 8) create the edges between the branches
212 _ <- mapM (\(bId,bId') ->
213 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
214 (Text.pack $ show(branchIdsToProximity bId bId'
215 (getThresholdInit $ phyloProximity $ getConfig phylo)
216 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
217 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
218
219
220 graphAttrs [Rank SameRank]
221
222
223
224
225
226 ----------------
227 -- | Filter | --
228 ----------------
229
230 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
231 filterByBranchSize thr export =
232 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
233 in export & export_branches .~ (fst branches')
234 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
235
236
237 processFilters :: [Filter] -> PhyloExport -> PhyloExport
238 processFilters filters export =
239 foldl (\export' f -> case f of
240 ByBranchSize thr -> filterByBranchSize thr export'
241 ) export filters
242
243 --------------
244 -- | Sort | --
245 --------------
246
247 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
248 sortByHierarchy depth branches =
249 if (length branches == 1)
250 then branches
251 else concat
252 $ map (\branches' ->
253 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
254 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
255 ++ (sortByHierarchy (depth + 1) (snd partitions)))
256 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
257 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
258
259
260 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
261 sortByBirthDate order export =
262 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
263 branches' = case order of
264 Asc -> branches
265 Desc -> reverse branches
266 in export & export_branches .~ branches'
267
268 processSort :: Sort -> PhyloExport -> PhyloExport
269 processSort sort' export = case sort' of
270 ByBirthDate o -> sortByBirthDate o export
271 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
272
273
274 -----------------
275 -- | Metrics | --
276 -----------------
277
278 -- | Return the conditional probability of i knowing j
279 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
280 conditional m i j = (findWithDefault 0 (i,j) m)
281 / (m ! (j,j))
282
283
284 -- | Return the genericity score of a given ngram
285 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
286 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
287 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
288
289
290 -- | Return the specificity score of a given ngram
291 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
292 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
293 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
294
295
296 -- | Return the inclusion score of a given ngram
297 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
298 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
299 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
300
301
302 ngramsMetrics :: PhyloExport -> PhyloExport
303 ngramsMetrics export =
304 over ( export_groups
305 . traverse )
306 (\g -> g & phylo_groupMeta %~ insert "genericity"
307 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
308 & phylo_groupMeta %~ insert "specificity"
309 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
310 & phylo_groupMeta %~ insert "inclusion"
311 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
312 ) export
313
314
315 branchDating :: PhyloExport -> PhyloExport
316 branchDating export =
317 over ( export_branches
318 . traverse )
319 (\b ->
320 let groups = sortOn fst
321 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
322 then acc ++ [g ^. phylo_groupPeriod]
323 else acc ) [] $ export ^. export_groups
324 birth = fst $ head' "birth" groups
325 age = (snd $ last' "age" groups) - birth
326 in b & branch_meta %~ insert "birth" [fromIntegral birth]
327 & branch_meta %~ insert "age" [fromIntegral age]
328 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
329
330 processMetrics :: PhyloExport -> PhyloExport
331 processMetrics export = ngramsMetrics
332 $ branchDating export
333
334
335 -----------------
336 -- | Taggers | --
337 -----------------
338
339 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
340 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
341 $ take nth
342 $ reverse
343 $ sortOn snd $ zip [0..] meta
344
345
346 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
347 mostInclusive nth foundations export =
348 over ( export_branches
349 . traverse )
350 (\b ->
351 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
352 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
353 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
354 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
355 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
356 in b & branch_label .~ lbl ) export
357
358
359 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
360 mostEmergentInclusive nth foundations export =
361 over ( export_groups
362 . traverse )
363 (\g ->
364 let lbl = ngramsToLabel foundations
365 $ take nth
366 $ map (\(_,(_,idx)) -> idx)
367 $ concat
368 $ map (\groups -> sortOn (fst . snd) groups)
369 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
370 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
371 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
372 in g & phylo_groupLabel .~ lbl ) export
373
374
375 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
376 processLabels labels foundations export =
377 foldl (\export' label ->
378 case label of
379 GroupLabel tagger nth ->
380 case tagger of
381 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
382 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
383 BranchLabel tagger nth ->
384 case tagger of
385 MostInclusive -> mostInclusive nth foundations export'
386 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
387
388
389 ------------------
390 -- | Dynamics | --
391 ------------------
392
393
394 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
395 toDynamics n parents group m =
396 let prd = group ^. phylo_groupPeriod
397 end = last' "dynamics" (sort $ map snd $ elems m)
398 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
399 -- | decrease
400 then 2
401 else if ((fst prd) == (fst $ m ! n))
402 -- | recombination
403 then 0
404 else if isNew
405 -- | emergence
406 then 1
407 else 3
408 where
409 --------------------------------------
410 isNew :: Bool
411 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
412
413
414 processDynamics :: [PhyloGroup] -> [PhyloGroup]
415 processDynamics groups =
416 map (\g ->
417 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
418 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
419 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
420 where
421 --------------------------------------
422 mapNgrams :: Map Int (Date,Date)
423 mapNgrams = map (\dates ->
424 let dates' = sort dates
425 in (head' "dynamics" dates', last' "dynamics" dates'))
426 $ fromListWith (++)
427 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
428 $ (g ^. phylo_groupNgrams))) [] groups
429
430
431 ---------------------
432 -- | phyloExport | --
433 ---------------------
434
435
436 toPhyloExport :: Phylo -> DotGraph DotId
437 toPhyloExport phylo = exportToDot phylo
438 $ processFilters (exportFilter $ getConfig phylo)
439 $ processSort (exportSort $ getConfig phylo)
440 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
441 $ processMetrics export
442 where
443 export :: PhyloExport
444 export = PhyloExport groups branches
445 --------------------------------------
446 branches :: [PhyloBranch]
447 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
448 --------------------------------------
449 groups :: [PhyloGroup]
450 groups = processDynamics
451 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
452
453
454 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
455 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches