]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
add export logs
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
23
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
37
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
41
42 --------------------
43 -- | Dot export | --
44 --------------------
45
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
48
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
51
52 dynamicToColor :: Double -> H.Attribute
53 dynamicToColor d
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
58
59 pickLabelColor :: [Double] -> H.Attribute
60 pickLabelColor lst
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
65
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
68
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
71
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
74
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
77
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
80
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
83
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
94 where
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
110
111 branchToDotNode :: PhyloBranch -> Dot DotId
112 branchToDotNode b =
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
118
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
126
127
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
136 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
137
138
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge source target lbl edgeType = edge source target
141 (case edgeType of
142 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
149
150
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
156
157
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
161 <> show(length $ export ^. export_groups) <> " groups "
162 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n") $
163 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
164
165 -- | 1) init the dot graph
166 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
167 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
168 , Ratio FillRatio
169 , Style [SItem Filled []],Color [toWColor White]]
170 -- | home made attributes
171 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
172 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
174 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
175 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
176 ])
177
178
179 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
180
181 -- | 2) create a layer for the branches labels
182 subgraph (Str "Branches peaks") $ do
183
184 graphAttrs [Rank SameRank]
185
186 -- | 3) group the branches by hierarchy
187 -- mapM (\branches ->
188 -- subgraph (Str "Branches clade") $ do
189 -- graphAttrs [Rank SameRank]
190
191 -- -- | 4) create a node for each branch
192 -- mapM branchToDotNode branches
193 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
194
195 mapM branchToDotNode $ export ^. export_branches
196
197 -- | 5) create a layer for each period
198 _ <- mapM (\period ->
199 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
200 graphAttrs [Rank SameRank]
201 periodToDotNode period
202
203 -- | 6) create a node for each group
204 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
205 ) $ getPeriodIds phylo
206
207 -- | 7) create the edges between a branch and its first groups
208 _ <- mapM (\(bId,groups) ->
209 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
210 )
211 $ toList
212 $ map (\groups -> head' "toDot"
213 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
214 $ sortOn (fst . _phylo_groupPeriod) groups)
215 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
216
217 -- | 8) create the edges between the groups
218 _ <- mapM (\((k,k'),_) ->
219 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
220 ) $ (toList . mergePointers) $ export ^. export_groups
221
222 -- | 7) create the edges between the periods
223 _ <- mapM (\(prd,prd') ->
224 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
225 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
226
227 -- | 8) create the edges between the branches
228 _ <- mapM (\(bId,bId') ->
229 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
230 (Text.pack $ show(branchIdsToProximity bId bId'
231 (getThresholdInit $ phyloProximity $ getConfig phylo)
232 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
233 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
234
235
236 graphAttrs [Rank SameRank]
237
238
239
240
241
242 ----------------
243 -- | Filter | --
244 ----------------
245
246 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
247 filterByBranchSize thr export =
248 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
249 in export & export_branches .~ (fst branches')
250 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
251
252
253 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
254 processFilters filters qua export =
255 foldl (\export' f -> case f of
256 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
257 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
258 else filterByBranchSize thr export'
259 ) export filters
260
261 --------------
262 -- | Sort | --
263 --------------
264
265 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
266 sortByHierarchy depth branches =
267 if (length branches == 1)
268 then branches
269 else concat
270 $ map (\branches' ->
271 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
272 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
273 ++ (sortByHierarchy (depth + 1) (snd partitions)))
274 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
275 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
276
277
278 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
279 sortByBirthDate order export =
280 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
281 branches' = case order of
282 Asc -> branches
283 Desc -> reverse branches
284 in export & export_branches .~ branches'
285
286 processSort :: Sort -> PhyloExport -> PhyloExport
287 processSort sort' export = case sort' of
288 ByBirthDate o -> sortByBirthDate o export
289 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
290
291
292 -----------------
293 -- | Metrics | --
294 -----------------
295
296 -- | Return the conditional probability of i knowing j
297 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
298 conditional m i j = (findWithDefault 0 (i,j) m)
299 / (m ! (j,j))
300
301
302 -- | Return the genericity score of a given ngram
303 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
304 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
305 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
306
307
308 -- | Return the specificity score of a given ngram
309 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
310 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
311 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
312
313
314 -- | Return the inclusion score of a given ngram
315 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
316 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
317 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
318
319
320 ngramsMetrics :: PhyloExport -> PhyloExport
321 ngramsMetrics export =
322 over ( export_groups
323 . traverse )
324 (\g -> g & phylo_groupMeta %~ insert "genericity"
325 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
326 & phylo_groupMeta %~ insert "specificity"
327 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
328 & phylo_groupMeta %~ insert "inclusion"
329 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
330 ) export
331
332
333 branchDating :: PhyloExport -> PhyloExport
334 branchDating export =
335 over ( export_branches
336 . traverse )
337 (\b ->
338 let groups = sortOn fst
339 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
340 then acc ++ [g ^. phylo_groupPeriod]
341 else acc ) [] $ export ^. export_groups
342 periods = nub groups
343 birth = fst $ head' "birth" groups
344 age = (snd $ last' "age" groups) - birth
345 in b & branch_meta %~ insert "birth" [fromIntegral birth]
346 & branch_meta %~ insert "age" [fromIntegral age]
347 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
348
349 processMetrics :: PhyloExport -> PhyloExport
350 processMetrics export = ngramsMetrics
351 $ branchDating export
352
353
354 -----------------
355 -- | Taggers | --
356 -----------------
357
358 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
359 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
360 $ take nth
361 $ reverse
362 $ sortOn snd $ zip [0..] meta
363
364
365 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
366 mostInclusive nth foundations export =
367 over ( export_branches
368 . traverse )
369 (\b ->
370 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
371 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
372 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
373 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
374 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
375 in b & branch_label .~ lbl ) export
376
377
378 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
379 mostEmergentInclusive nth foundations export =
380 over ( export_groups
381 . traverse )
382 (\g ->
383 let lbl = ngramsToLabel foundations
384 $ take nth
385 $ map (\(_,(_,idx)) -> idx)
386 $ concat
387 $ map (\groups -> sortOn (fst . snd) groups)
388 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
389 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
390 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
391 in g & phylo_groupLabel .~ lbl ) export
392
393
394 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
395 processLabels labels foundations export =
396 foldl (\export' label ->
397 case label of
398 GroupLabel tagger nth ->
399 case tagger of
400 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
401 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
402 BranchLabel tagger nth ->
403 case tagger of
404 MostInclusive -> mostInclusive nth foundations export'
405 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
406
407
408 ------------------
409 -- | Dynamics | --
410 ------------------
411
412
413 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
414 toDynamics n parents group m =
415 let prd = group ^. phylo_groupPeriod
416 end = last' "dynamics" (sort $ map snd $ elems m)
417 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
418 -- | decrease
419 then 2
420 else if ((fst prd) == (fst $ m ! n))
421 -- | recombination
422 then 0
423 else if isNew
424 -- | emergence
425 then 1
426 else 3
427 where
428 --------------------------------------
429 isNew :: Bool
430 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
431
432
433 processDynamics :: [PhyloGroup] -> [PhyloGroup]
434 processDynamics groups =
435 map (\g ->
436 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
437 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
438 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
439 where
440 --------------------------------------
441 mapNgrams :: Map Int (Date,Date)
442 mapNgrams = map (\dates ->
443 let dates' = sort dates
444 in (head' "dynamics" dates', last' "dynamics" dates'))
445 $ fromListWith (++)
446 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
447 $ (g ^. phylo_groupNgrams))) [] groups
448
449
450 ---------------------
451 -- | phyloExport | --
452 ---------------------
453
454
455 toPhyloExport :: Phylo -> DotGraph DotId
456 toPhyloExport phylo = exportToDot phylo
457 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
458 $ processSort (exportSort $ getConfig phylo)
459 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
460 $ processMetrics export
461 where
462 export :: PhyloExport
463 export = PhyloExport groups branches
464 --------------------------------------
465 branches :: [PhyloBranch]
466 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
467 --------------------------------------
468 groups :: [PhyloGroup]
469 groups = traceExportGroups
470 $ processDynamics
471 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
472
473
474 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
475 traceExportBranches branches = trace ("\n"
476 <> "-- | Export " <> show(length branches) <> " branches") branches
477
478 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
479 traceExportGroups groups = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Export " <> show(length groups) <> " groups and "
480 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
481 ) groups
482