]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
Merge branch 'dev-corpus-add-file' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell...
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
23
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
37
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
41
42 --------------------
43 -- | Dot export | --
44 --------------------
45
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
48
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
51
52 dynamicToColor :: Double -> H.Attribute
53 dynamicToColor d
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
58
59 pickLabelColor :: [Double] -> H.Attribute
60 pickLabelColor lst
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
65
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
68
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
71
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
74
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
77
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
80
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
83
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
94 where
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
110
111 branchToDotNode :: PhyloBranch -> Dot DotId
112 branchToDotNode b =
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
118
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
126
127
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
136 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
137
138
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge source target lbl edgeType = edge source target
141 (case edgeType of
142 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
149
150
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
156
157
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
161 <> show(length $ export ^. export_groups) <> " groups to a dot file\n") $
162 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
163
164 -- | 1) init the dot graph
165 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
166 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
167 , Ratio FillRatio
168 , Style [SItem Filled []],Color [toWColor White]]
169 -- | home made attributes
170 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
171 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
172 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
174 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
175 ])
176
177
178 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
179
180 -- | 2) create a layer for the branches labels
181 subgraph (Str "Branches peaks") $ do
182
183 graphAttrs [Rank SameRank]
184
185 -- | 3) group the branches by hierarchy
186 -- mapM (\branches ->
187 -- subgraph (Str "Branches clade") $ do
188 -- graphAttrs [Rank SameRank]
189
190 -- -- | 4) create a node for each branch
191 -- mapM branchToDotNode branches
192 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
193
194 mapM branchToDotNode $ export ^. export_branches
195
196 -- | 5) create a layer for each period
197 _ <- mapM (\period ->
198 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
199 graphAttrs [Rank SameRank]
200 periodToDotNode period
201
202 -- | 6) create a node for each group
203 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
204 ) $ getPeriodIds phylo
205
206 -- | 7) create the edges between a branch and its first groups
207 _ <- mapM (\(bId,groups) ->
208 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
209 )
210 $ toList
211 $ map (\groups -> head' "toDot"
212 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
213 $ sortOn (fst . _phylo_groupPeriod) groups)
214 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
215
216 -- | 8) create the edges between the groups
217 _ <- mapM (\((k,k'),_) ->
218 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
219 ) $ (toList . mergePointers) $ export ^. export_groups
220
221 -- | 7) create the edges between the periods
222 _ <- mapM (\(prd,prd') ->
223 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
224 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
225
226 -- | 8) create the edges between the branches
227 _ <- mapM (\(bId,bId') ->
228 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
229 (Text.pack $ show(branchIdsToProximity bId bId'
230 (getThresholdInit $ phyloProximity $ getConfig phylo)
231 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
232 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
233
234
235 graphAttrs [Rank SameRank]
236
237
238
239
240
241 ----------------
242 -- | Filter | --
243 ----------------
244
245 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
246 filterByBranchSize thr export =
247 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
248 in export & export_branches .~ (fst branches')
249 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
250
251
252 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
253 processFilters filters qua export =
254 foldl (\export' f -> case f of
255 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
256 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
257 else filterByBranchSize thr export'
258 ) export filters
259
260 --------------
261 -- | Sort | --
262 --------------
263
264 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
265 sortByHierarchy depth branches =
266 if (length branches == 1)
267 then branches
268 else concat
269 $ map (\branches' ->
270 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
271 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
272 ++ (sortByHierarchy (depth + 1) (snd partitions)))
273 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
274 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
275
276
277 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
278 sortByBirthDate order export =
279 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
280 branches' = case order of
281 Asc -> branches
282 Desc -> reverse branches
283 in export & export_branches .~ branches'
284
285 processSort :: Sort -> PhyloExport -> PhyloExport
286 processSort sort' export = case sort' of
287 ByBirthDate o -> sortByBirthDate o export
288 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
289
290
291 -----------------
292 -- | Metrics | --
293 -----------------
294
295 -- | Return the conditional probability of i knowing j
296 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
297 conditional m i j = (findWithDefault 0 (i,j) m)
298 / (m ! (j,j))
299
300
301 -- | Return the genericity score of a given ngram
302 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
303 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
304 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
305
306
307 -- | Return the specificity score of a given ngram
308 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
309 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
310 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
311
312
313 -- | Return the inclusion score of a given ngram
314 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
315 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
316 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
317
318
319 ngramsMetrics :: PhyloExport -> PhyloExport
320 ngramsMetrics export =
321 over ( export_groups
322 . traverse )
323 (\g -> g & phylo_groupMeta %~ insert "genericity"
324 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
325 & phylo_groupMeta %~ insert "specificity"
326 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
327 & phylo_groupMeta %~ insert "inclusion"
328 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
329 ) export
330
331
332 branchDating :: PhyloExport -> PhyloExport
333 branchDating export =
334 over ( export_branches
335 . traverse )
336 (\b ->
337 let groups = sortOn fst
338 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
339 then acc ++ [g ^. phylo_groupPeriod]
340 else acc ) [] $ export ^. export_groups
341 birth = fst $ head' "birth" groups
342 age = (snd $ last' "age" groups) - birth
343 in b & branch_meta %~ insert "birth" [fromIntegral birth]
344 & branch_meta %~ insert "age" [fromIntegral age]
345 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
346
347 processMetrics :: PhyloExport -> PhyloExport
348 processMetrics export = ngramsMetrics
349 $ branchDating export
350
351
352 -----------------
353 -- | Taggers | --
354 -----------------
355
356 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
357 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
358 $ take nth
359 $ reverse
360 $ sortOn snd $ zip [0..] meta
361
362
363 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
364 mostInclusive nth foundations export =
365 over ( export_branches
366 . traverse )
367 (\b ->
368 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
369 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
370 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
371 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
372 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
373 in b & branch_label .~ lbl ) export
374
375
376 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
377 mostEmergentInclusive nth foundations export =
378 over ( export_groups
379 . traverse )
380 (\g ->
381 let lbl = ngramsToLabel foundations
382 $ take nth
383 $ map (\(_,(_,idx)) -> idx)
384 $ concat
385 $ map (\groups -> sortOn (fst . snd) groups)
386 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
387 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
388 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
389 in g & phylo_groupLabel .~ lbl ) export
390
391
392 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
393 processLabels labels foundations export =
394 foldl (\export' label ->
395 case label of
396 GroupLabel tagger nth ->
397 case tagger of
398 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
399 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
400 BranchLabel tagger nth ->
401 case tagger of
402 MostInclusive -> mostInclusive nth foundations export'
403 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
404
405
406 ------------------
407 -- | Dynamics | --
408 ------------------
409
410
411 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
412 toDynamics n parents group m =
413 let prd = group ^. phylo_groupPeriod
414 end = last' "dynamics" (sort $ map snd $ elems m)
415 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
416 -- | decrease
417 then 2
418 else if ((fst prd) == (fst $ m ! n))
419 -- | recombination
420 then 0
421 else if isNew
422 -- | emergence
423 then 1
424 else 3
425 where
426 --------------------------------------
427 isNew :: Bool
428 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
429
430
431 processDynamics :: [PhyloGroup] -> [PhyloGroup]
432 processDynamics groups =
433 map (\g ->
434 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
435 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
436 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
437 where
438 --------------------------------------
439 mapNgrams :: Map Int (Date,Date)
440 mapNgrams = map (\dates ->
441 let dates' = sort dates
442 in (head' "dynamics" dates', last' "dynamics" dates'))
443 $ fromListWith (++)
444 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
445 $ (g ^. phylo_groupNgrams))) [] groups
446
447
448 ---------------------
449 -- | phyloExport | --
450 ---------------------
451
452
453 toPhyloExport :: Phylo -> DotGraph DotId
454 toPhyloExport phylo = exportToDot phylo
455 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
456 $ processSort (exportSort $ getConfig phylo)
457 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
458 $ processMetrics export
459 where
460 export :: PhyloExport
461 export = PhyloExport groups branches
462 --------------------------------------
463 branches :: [PhyloBranch]
464 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
465 --------------------------------------
466 groups :: [PhyloGroup]
467 groups = processDynamics
468 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
469
470
471 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
472 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches