]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
end of phylo clustering
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Text (Text)
23 import Data.Vector (Vector)
24
25 import Prelude (writeFile)
26 import Gargantext.Prelude
27 import Gargantext.Viz.AdaptativePhylo
28 import Gargantext.Viz.Phylo.PhyloTools
29
30 import Control.Lens
31 import Data.GraphViz hiding (DotGraph, Order)
32 import Data.GraphViz.Types.Generalised (DotGraph)
33 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
34 import Data.GraphViz.Types.Monadic
35 import Data.Text.Lazy (fromStrict, pack, unpack)
36 import System.FilePath
37 import Debug.Trace (trace)
38
39 import qualified Data.Text as Text
40 import qualified Data.Text.Lazy as Lazy
41 import qualified Data.GraphViz.Attributes.HTML as H
42
43 --------------------
44 -- | Dot export | --
45 --------------------
46
47 dotToFile :: FilePath -> DotGraph DotId -> IO ()
48 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
49
50 dotToString :: DotGraph DotId -> [Char]
51 dotToString dotG = unpack (printDotGraph dotG)
52
53 dynamicToColor :: Double -> H.Attribute
54 dynamicToColor d
55 | d == 0 = H.BGColor (toColor LightCoral)
56 | d == 1 = H.BGColor (toColor Khaki)
57 | d == 2 = H.BGColor (toColor SkyBlue)
58 | otherwise = H.Color (toColor Black)
59
60 pickLabelColor :: [Double] -> H.Attribute
61 pickLabelColor lst
62 | elem 0 lst = dynamicToColor 0
63 | elem 2 lst = dynamicToColor 2
64 | elem 1 lst = dynamicToColor 1
65 | otherwise = dynamicToColor 3
66
67 toDotLabel :: Text.Text -> Label
68 toDotLabel lbl = StrLabel $ fromStrict lbl
69
70 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
71 toAttr k v = customAttribute k v
72
73 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
74 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
75
76 groupIdToDotId :: PhyloGroupId -> DotId
77 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
78
79 branchIdToDotId :: PhyloBranchId -> DotId
80 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
81
82 periodIdToDotId :: PhyloPeriodId -> DotId
83 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
84
85 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
86 groupToTable fdt g = H.Table H.HTable
87 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
88 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
89 , H.tableRows = [header]
90 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
91 <> ( map ngramsToRow $ splitEvery 4
92 $ reverse $ sortOn (snd . snd)
93 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
94 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
95 where
96 --------------------------------------
97 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
98 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
99 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
100 --------------------------------------
101 header :: H.Row
102 header =
103 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
104 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (fromStrict " ( ")
106 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (fromStrict " , ")
108 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (fromStrict " ) "))]]
110 --------------------------------------
111
112 branchToDotNode :: PhyloBranch -> Dot DotId
113 branchToDotNode b =
114 node (branchIdToDotId $ b ^. branch_id)
115 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
116 <> (metaToAttr $ b ^. branch_meta)
117 <> [ toAttr "nodeType" "branch"
118 , toAttr "branchId" (pack $ show (snd $ b ^. branch_id)) ])
119
120 periodToDotNode :: (Date,Date) -> Dot DotId
121 periodToDotNode prd =
122 node (periodIdToDotId prd)
123 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
126 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
127
128
129 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
130 groupToDotNode fdt g =
131 node (groupIdToDotId $ getGroupId g)
132 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
133 <> [ toAttr "nodeType" "group"
134 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
135 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
136 , toAttr "branchId" (pack $ show (snd $ g ^. phylo_groupBranchId))])
137
138
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge from to lbl edgeType = edge from to
141 (case edgeType of
142 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
149
150
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(to,w) -> ((getGroupId g,to),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(to,w) -> ((to,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
156
157
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
161
162 -- | 1) init the dot graph
163 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
164 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
165 , Ratio FillRatio
166 , Style [SItem Filled []],Color [toWColor White]])
167
168 -- | 2) create a layer for the branches labels
169 subgraph (Str "Branches peaks") $ do
170
171 graphAttrs [Rank SameRank]
172
173 -- | 3) group the branches by hierarchy
174 mapM (\branches ->
175 subgraph (Str "Branches clade") $ do
176 graphAttrs [Rank SameRank]
177
178 -- | 4) create a node for each branch
179 mapM branchToDotNode branches
180 ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
181
182 -- | 5) create a layer for each period
183 mapM (\period ->
184 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
185 graphAttrs [Rank SameRank]
186 periodToDotNode period
187
188 -- | 6) create a node for each group
189 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
190 ) $ getPeriodIds phylo
191
192 -- | 7) create the edges between a branch and its first groups
193 mapM (\(bId,groups) ->
194 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
195 )
196 $ toList
197 $ map (\groups -> head' "toDot"
198 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
199 $ sortOn (fst . _phylo_groupPeriod) groups)
200 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
201
202 -- | 8) create the edges between the groups
203 mapM (\((k,k'),w) ->
204 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
205 ) $ (toList . mergePointers) $ export ^. export_groups
206
207 -- | 7) create the edges between the periods
208 mapM (\(prd,prd') ->
209 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
210 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
211
212 -- | 8) create the edges between the branches
213 mapM (\(bId,bId') ->
214 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
215 (Text.pack $ show(branchIdsToProximity bId bId'
216 (getThresholdInit $ phyloProximity $ getConfig phylo)
217 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
218 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
219
220
221 graphAttrs [Rank SameRank]
222
223
224
225
226
227 ----------------
228 -- | Filter | --
229 ----------------
230
231 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
232 filterByBranchSize thr export =
233 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
234 in export & export_branches .~ (fst branches')
235 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
236
237
238 processFilters :: [Filter] -> PhyloExport -> PhyloExport
239 processFilters filters export =
240 foldl (\export' f -> case f of
241 ByBranchSize thr -> filterByBranchSize thr export'
242 _ -> export'
243 ) export filters
244
245 --------------
246 -- | Sort | --
247 --------------
248
249 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
250 sortByHierarchy depth branches =
251 if (length branches == 1)
252 then branches
253 else concat
254 $ map (\branches' ->
255 let parts = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
256 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst parts))
257 ++ (sortByHierarchy (depth + 1) (snd parts)))
258 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
259 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
260
261
262 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
263 sortByBirthDate order export =
264 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
265 branches' = case order of
266 Asc -> branches
267 Desc -> reverse branches
268 in export & export_branches .~ branches'
269
270 processSort :: Sort -> PhyloExport -> PhyloExport
271 processSort sort' export = case sort' of
272 ByBirthDate o -> sortByBirthDate o export
273 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
274
275
276 -----------------
277 -- | Metrics | --
278 -----------------
279
280 -- | Return the conditional probability of i knowing j
281 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
282 conditional m i j = (findWithDefault 0 (i,j) m)
283 / (m ! (j,j))
284
285
286 -- | Return the genericity score of a given ngram
287 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
288 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
289 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
290
291
292 -- | Return the specificity score of a given ngram
293 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
294 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
295 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
296
297
298 -- | Return the inclusion score of a given ngram
299 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
300 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
301 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
302
303
304 ngramsMetrics :: PhyloExport -> PhyloExport
305 ngramsMetrics export =
306 over ( export_groups
307 . traverse )
308 (\g -> g & phylo_groupMeta %~ insert "genericity"
309 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
310 & phylo_groupMeta %~ insert "specificity"
311 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
312 & phylo_groupMeta %~ insert "inclusion"
313 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
314 ) export
315
316
317 branchDating :: PhyloExport -> PhyloExport
318 branchDating export =
319 over ( export_branches
320 . traverse )
321 (\b ->
322 let groups = sortOn fst
323 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
324 then acc ++ [g ^. phylo_groupPeriod]
325 else acc ) [] $ export ^. export_groups
326 birth = fst $ head' "birth" groups
327 age = (snd $ last' "age" groups) - birth
328 in b & branch_meta %~ insert "birth" [fromIntegral birth]
329 & branch_meta %~ insert "age" [fromIntegral age]
330 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
331
332 processMetrics :: PhyloExport -> PhyloExport
333 processMetrics export = ngramsMetrics
334 $ branchDating export
335
336
337 -----------------
338 -- | Taggers | --
339 -----------------
340
341 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
342 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
343 $ take nth
344 $ reverse
345 $ sortOn snd $ zip [0..] meta
346
347
348 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
349 mostInclusive nth foundations export =
350 over ( export_branches
351 . traverse )
352 (\b ->
353 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
354 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
355 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
356 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
357 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
358 in b & branch_label .~ lbl ) export
359
360
361 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
362 mostEmergentInclusive nth foundations export =
363 over ( export_groups
364 . traverse )
365 (\g ->
366 let lbl = ngramsToLabel foundations
367 $ take nth
368 $ map (\(_,(_,idx)) -> idx)
369 $ concat
370 $ map (\groups -> sortOn (fst . snd) groups)
371 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
372 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
373 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
374 in g & phylo_groupLabel .~ lbl ) export
375
376
377 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
378 processLabels labels foundations export =
379 foldl (\export' label ->
380 case label of
381 GroupLabel tagger nth ->
382 case tagger of
383 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
384 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
385 BranchLabel tagger nth ->
386 case tagger of
387 MostInclusive -> mostInclusive nth foundations export'
388 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
389
390
391 ------------------
392 -- | Dynamics | --
393 ------------------
394
395
396 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
397 toDynamics n parents group m =
398 let prd = group ^. phylo_groupPeriod
399 bid = group ^. phylo_groupBranchId
400 end = last' "dynamics" (sort $ map snd $ elems m)
401 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
402 -- | decrease
403 then 2
404 else if ((fst prd) == (fst $ m ! n))
405 -- | recombination
406 then 0
407 else if isNew
408 -- | emergence
409 then 1
410 else 3
411 where
412 --------------------------------------
413 isNew :: Bool
414 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
415
416
417 processDynamics :: [PhyloGroup] -> [PhyloGroup]
418 processDynamics groups =
419 map (\g ->
420 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
421 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
422 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
423 where
424 --------------------------------------
425 mapNgrams :: Map Int (Date,Date)
426 mapNgrams = map (\dates ->
427 let dates' = sort dates
428 in (head' "dynamics" dates', last' "dynamics" dates'))
429 $ fromListWith (++)
430 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
431 $ (g ^. phylo_groupNgrams))) [] groups
432
433
434 ---------------------
435 -- | phyloExport | --
436 ---------------------
437
438
439 toPhyloExport :: Phylo -> DotGraph DotId
440 toPhyloExport phylo = exportToDot phylo
441 $ processFilters (exportFilter $ getConfig phylo)
442 $ processSort (exportSort $ getConfig phylo)
443 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
444 $ processMetrics export
445 where
446 export :: PhyloExport
447 export = PhyloExport groups branches
448 --------------------------------------
449 branches :: [PhyloBranch]
450 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
451 --------------------------------------
452 groups :: [PhyloGroup]
453 groups = processDynamics
454 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
455
456
457 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
458 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches