2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Vector as Vector
40 import qualified Data.Text.Lazy as Lazy
41 import qualified Data.GraphViz.Attributes.HTML as H
47 dotToFile :: FilePath -> DotGraph DotId -> IO ()
48 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
50 dotToString :: DotGraph DotId -> [Char]
51 dotToString dotG = unpack (printDotGraph dotG)
53 dynamicToColor :: Double -> H.Attribute
55 | d == 0 = H.BGColor (toColor LightCoral)
56 | d == 1 = H.BGColor (toColor Khaki)
57 | d == 2 = H.BGColor (toColor SkyBlue)
58 | otherwise = H.Color (toColor Black)
60 pickLabelColor :: [Double] -> H.Attribute
62 | elem 0 lst = dynamicToColor 0
63 | elem 2 lst = dynamicToColor 2
64 | elem 1 lst = dynamicToColor 1
65 | otherwise = dynamicToColor 3
67 toDotLabel :: Text.Text -> Label
68 toDotLabel lbl = StrLabel $ fromStrict lbl
70 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
71 toAttr k v = customAttribute k v
73 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
74 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
76 groupIdToDotId :: PhyloGroupId -> DotId
77 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
79 branchIdToDotId :: PhyloBranchId -> DotId
80 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
82 periodIdToDotId :: PhyloPeriodId -> DotId
83 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
85 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
86 groupToTable fdt g = H.Table H.HTable
87 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
88 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
89 , H.tableRows = [header]
90 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
91 <> ( map ngramsToRow $ splitEvery 4
92 $ reverse $ sortOn (snd . snd)
93 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
94 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
96 --------------------------------------
97 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
98 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
99 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
100 --------------------------------------
103 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
104 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (fromStrict " ( ")
106 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (fromStrict " , ")
108 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (fromStrict " ) "))]]
110 --------------------------------------
112 branchToDotNode :: PhyloBranch -> Dot DotId
114 node (branchIdToDotId $ b ^. branch_id)
115 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
116 <> (metaToAttr $ b ^. branch_meta)
117 <> [ toAttr "nodeType" "branch"
118 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
120 periodToDotNode :: (Date,Date) -> Dot DotId
121 periodToDotNode prd =
122 node (periodIdToDotId prd)
123 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
126 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
130 groupToDotNode fdt g =
131 node (groupIdToDotId $ getGroupId g)
132 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
133 <> [ toAttr "nodeType" "group"
134 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
135 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
136 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
137 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
140 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
141 toDotEdge source target lbl edgeType = edge source target
143 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
144 , Label (StrLabel $ fromStrict lbl)]
145 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
146 , Label (StrLabel $ fromStrict lbl)]
147 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
148 , Label (StrLabel $ fromStrict lbl)]
149 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
152 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
153 mergePointers groups =
154 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
155 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
156 in unionWith (\w w' -> max w w') toChilds toParents
159 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
160 exportToDot phylo export =
161 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
162 <> show(length $ export ^. export_groups) <> " groups "
163 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
164 <> "##########################") $
165 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
167 -- | 1) init the dot graph
168 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
169 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
171 , Style [SItem Filled []],Color [toWColor White]]
172 -- | home made attributes
173 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
174 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
175 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
176 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
177 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
181 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
183 -- | 2) create a layer for the branches labels
184 subgraph (Str "Branches peaks") $ do
186 graphAttrs [Rank SameRank]
188 -- | 3) group the branches by hierarchy
189 -- mapM (\branches ->
190 -- subgraph (Str "Branches clade") $ do
191 -- graphAttrs [Rank SameRank]
193 -- -- | 4) create a node for each branch
194 -- mapM branchToDotNode branches
195 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
197 mapM branchToDotNode $ export ^. export_branches
199 -- | 5) create a layer for each period
200 _ <- mapM (\period ->
201 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
202 graphAttrs [Rank SameRank]
203 periodToDotNode period
205 -- | 6) create a node for each group
206 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
207 ) $ getPeriodIds phylo
209 -- | 7) create the edges between a branch and its first groups
210 _ <- mapM (\(bId,groups) ->
211 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
214 $ map (\groups -> head' "toDot"
215 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
216 $ sortOn (fst . _phylo_groupPeriod) groups)
217 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
219 -- | 8) create the edges between the groups
220 _ <- mapM (\((k,k'),_) ->
221 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
222 ) $ (toList . mergePointers) $ export ^. export_groups
224 -- | 7) create the edges between the periods
225 _ <- mapM (\(prd,prd') ->
226 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
227 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
229 -- | 8) create the edges between the branches
230 _ <- mapM (\(bId,bId') ->
231 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
232 (Text.pack $ show(branchIdsToProximity bId bId'
233 (getThresholdInit $ phyloProximity $ getConfig phylo)
234 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
235 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
238 graphAttrs [Rank SameRank]
248 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
249 filterByBranchSize thr export =
250 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
251 in export & export_branches .~ (fst branches')
252 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
255 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
256 processFilters filters qua export =
257 foldl (\export' f -> case f of
258 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
259 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
260 else filterByBranchSize thr export'
267 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
268 sortByHierarchy depth branches =
269 if (length branches == 1)
273 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
274 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
275 ++ (sortByHierarchy (depth + 1) (snd partitions)))
276 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
277 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
280 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
281 sortByBirthDate order export =
282 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
283 branches' = case order of
285 Desc -> reverse branches
286 in export & export_branches .~ branches'
288 processSort :: Sort -> PhyloExport -> PhyloExport
289 processSort sort' export = case sort' of
290 ByBirthDate o -> sortByBirthDate o export
291 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
298 -- | Return the conditional probability of i knowing j
299 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
300 conditional m i j = (findWithDefault 0 (i,j) m)
304 -- | Return the genericity score of a given ngram
305 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
306 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
307 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
310 -- | Return the specificity score of a given ngram
311 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
312 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
313 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
316 -- | Return the inclusion score of a given ngram
317 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
318 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
319 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
322 ngramsMetrics :: PhyloExport -> PhyloExport
323 ngramsMetrics export =
326 (\g -> g & phylo_groupMeta %~ insert "genericity"
327 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
328 & phylo_groupMeta %~ insert "specificity"
329 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
330 & phylo_groupMeta %~ insert "inclusion"
331 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
335 branchDating :: PhyloExport -> PhyloExport
336 branchDating export =
337 over ( export_branches
340 let groups = sortOn fst
341 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
342 then acc ++ [g ^. phylo_groupPeriod]
343 else acc ) [] $ export ^. export_groups
345 birth = fst $ head' "birth" groups
346 age = (snd $ last' "age" groups) - birth
347 in b & branch_meta %~ insert "birth" [fromIntegral birth]
348 & branch_meta %~ insert "age" [fromIntegral age]
349 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
351 processMetrics :: PhyloExport -> PhyloExport
352 processMetrics export = ngramsMetrics
353 $ branchDating export
360 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
361 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
364 $ sortOn snd $ zip [0..] meta
367 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
368 mostInclusive nth foundations export =
369 over ( export_branches
372 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
373 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
374 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
375 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
376 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
377 in b & branch_label .~ lbl ) export
380 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
381 mostEmergentInclusive nth foundations export =
385 let lbl = ngramsToLabel foundations
387 $ map (\(_,(_,idx)) -> idx)
389 $ map (\groups -> sortOn (fst . snd) groups)
390 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
391 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
392 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
393 in g & phylo_groupLabel .~ lbl ) export
396 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
397 processLabels labels foundations export =
398 foldl (\export' label ->
400 GroupLabel tagger nth ->
402 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
403 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
404 BranchLabel tagger nth ->
406 MostInclusive -> mostInclusive nth foundations export'
407 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
415 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
416 toDynamics n parents group m =
417 let prd = group ^. phylo_groupPeriod
418 end = last' "dynamics" (sort $ map snd $ elems m)
419 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
422 else if ((fst prd) == (fst $ m ! n))
430 --------------------------------------
432 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
435 processDynamics :: [PhyloGroup] -> [PhyloGroup]
436 processDynamics groups =
438 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
439 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
440 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
442 --------------------------------------
443 mapNgrams :: Map Int (Date,Date)
444 mapNgrams = map (\dates ->
445 let dates' = sort dates
446 in (head' "dynamics" dates', last' "dynamics" dates'))
448 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
449 $ (g ^. phylo_groupNgrams))) [] groups
452 ---------------------
453 -- | phyloExport | --
454 ---------------------
457 toPhyloExport :: Phylo -> DotGraph DotId
458 toPhyloExport phylo = exportToDot phylo
459 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
460 $ processSort (exportSort $ getConfig phylo)
461 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
462 $ processMetrics export
464 export :: PhyloExport
465 export = PhyloExport groups branches
466 --------------------------------------
467 branches :: [PhyloBranch]
468 branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
469 --------------------------------------
470 groups :: [PhyloGroup]
471 groups = traceExportGroups
473 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
474 $ tracePhyloInfo phylo
477 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
478 traceExportBranches branches = trace ("\n"
479 <> "-- | Export " <> show(length branches) <> " branches") branches
481 tracePhyloInfo :: Phylo -> Phylo
482 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
483 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
484 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
488 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
489 traceExportGroups groups = trace ("\n" <> "-- | Export "
490 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
491 <> show(length groups) <> " groups and "
492 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"