2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
52 dynamicToColor :: Double -> H.Attribute
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
59 pickLabelColor :: [Double] -> H.Attribute
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
111 branchToDotNode :: PhyloBranch -> Dot DotId
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
136 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
139 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
140 toDotEdge source target lbl edgeType = edge source target
142 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
143 , Label (StrLabel $ fromStrict lbl)]
144 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
151 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
152 mergePointers groups =
153 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
154 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
155 in unionWith (\w w' -> max w w') toChilds toParents
158 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
159 exportToDot phylo export =
160 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
161 <> show(length $ export ^. export_groups) <> " groups to a dot file\n") $
162 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
164 -- | 1) init the dot graph
165 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
166 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
168 , Style [SItem Filled []],Color [toWColor White]]
169 -- | home made attributes
170 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
171 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
172 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
174 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
178 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
180 -- | 2) create a layer for the branches labels
181 subgraph (Str "Branches peaks") $ do
183 graphAttrs [Rank SameRank]
185 -- | 3) group the branches by hierarchy
186 -- mapM (\branches ->
187 -- subgraph (Str "Branches clade") $ do
188 -- graphAttrs [Rank SameRank]
190 -- -- | 4) create a node for each branch
191 -- mapM branchToDotNode branches
192 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
194 mapM branchToDotNode $ export ^. export_branches
196 -- | 5) create a layer for each period
197 _ <- mapM (\period ->
198 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
199 graphAttrs [Rank SameRank]
200 periodToDotNode period
202 -- | 6) create a node for each group
203 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
204 ) $ getPeriodIds phylo
206 -- | 7) create the edges between a branch and its first groups
207 _ <- mapM (\(bId,groups) ->
208 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
211 $ map (\groups -> head' "toDot"
212 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
213 $ sortOn (fst . _phylo_groupPeriod) groups)
214 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
216 -- | 8) create the edges between the groups
217 _ <- mapM (\((k,k'),_) ->
218 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
219 ) $ (toList . mergePointers) $ export ^. export_groups
221 -- | 7) create the edges between the periods
222 _ <- mapM (\(prd,prd') ->
223 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
224 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
226 -- | 8) create the edges between the branches
227 _ <- mapM (\(bId,bId') ->
228 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
229 (Text.pack $ show(branchIdsToProximity bId bId'
230 (getThresholdInit $ phyloProximity $ getConfig phylo)
231 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
232 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
235 graphAttrs [Rank SameRank]
245 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
246 filterByBranchSize thr export =
247 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
248 in export & export_branches .~ (fst branches')
249 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
252 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
253 processFilters filters qua export =
254 foldl (\export' f -> case f of
255 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
256 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
257 else filterByBranchSize thr export'
264 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
265 sortByHierarchy depth branches =
266 if (length branches == 1)
270 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
271 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
272 ++ (sortByHierarchy (depth + 1) (snd partitions)))
273 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
274 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
277 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
278 sortByBirthDate order export =
279 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
280 branches' = case order of
282 Desc -> reverse branches
283 in export & export_branches .~ branches'
285 processSort :: Sort -> PhyloExport -> PhyloExport
286 processSort sort' export = case sort' of
287 ByBirthDate o -> sortByBirthDate o export
288 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
295 -- | Return the conditional probability of i knowing j
296 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
297 conditional m i j = (findWithDefault 0 (i,j) m)
301 -- | Return the genericity score of a given ngram
302 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
303 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
304 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
307 -- | Return the specificity score of a given ngram
308 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
309 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
310 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
313 -- | Return the inclusion score of a given ngram
314 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
315 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
316 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
319 ngramsMetrics :: PhyloExport -> PhyloExport
320 ngramsMetrics export =
323 (\g -> g & phylo_groupMeta %~ insert "genericity"
324 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
325 & phylo_groupMeta %~ insert "specificity"
326 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
327 & phylo_groupMeta %~ insert "inclusion"
328 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
332 branchDating :: PhyloExport -> PhyloExport
333 branchDating export =
334 over ( export_branches
337 let groups = sortOn fst
338 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
339 then acc ++ [g ^. phylo_groupPeriod]
340 else acc ) [] $ export ^. export_groups
341 birth = fst $ head' "birth" groups
342 age = (snd $ last' "age" groups) - birth
343 in b & branch_meta %~ insert "birth" [fromIntegral birth]
344 & branch_meta %~ insert "age" [fromIntegral age]
345 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
347 processMetrics :: PhyloExport -> PhyloExport
348 processMetrics export = ngramsMetrics
349 $ branchDating export
356 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
357 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
360 $ sortOn snd $ zip [0..] meta
363 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
364 mostInclusive nth foundations export =
365 over ( export_branches
368 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
369 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
370 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
371 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
372 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
373 in b & branch_label .~ lbl ) export
376 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
377 mostEmergentInclusive nth foundations export =
381 let lbl = ngramsToLabel foundations
383 $ map (\(_,(_,idx)) -> idx)
385 $ map (\groups -> sortOn (fst . snd) groups)
386 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
387 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
388 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
389 in g & phylo_groupLabel .~ lbl ) export
392 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
393 processLabels labels foundations export =
394 foldl (\export' label ->
396 GroupLabel tagger nth ->
398 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
399 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
400 BranchLabel tagger nth ->
402 MostInclusive -> mostInclusive nth foundations export'
403 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
411 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
412 toDynamics n parents group m =
413 let prd = group ^. phylo_groupPeriod
414 end = last' "dynamics" (sort $ map snd $ elems m)
415 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
418 else if ((fst prd) == (fst $ m ! n))
426 --------------------------------------
428 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
431 processDynamics :: [PhyloGroup] -> [PhyloGroup]
432 processDynamics groups =
434 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
435 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
436 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
438 --------------------------------------
439 mapNgrams :: Map Int (Date,Date)
440 mapNgrams = map (\dates ->
441 let dates' = sort dates
442 in (head' "dynamics" dates', last' "dynamics" dates'))
444 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
445 $ (g ^. phylo_groupNgrams))) [] groups
448 ---------------------
449 -- | phyloExport | --
450 ---------------------
453 toPhyloExport :: Phylo -> DotGraph DotId
454 toPhyloExport phylo = exportToDot phylo
455 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
456 $ processSort (exportSort $ getConfig phylo)
457 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
458 $ processMetrics export
460 export :: PhyloExport
461 export = PhyloExport groups branches
462 --------------------------------------
463 branches :: [PhyloBranch]
464 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
465 --------------------------------------
466 groups :: [PhyloGroup]
467 groups = processDynamics
468 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
471 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
472 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches