2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
18 module Gargantext.Viz.Phylo.PhyloExport where
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
38 import qualified Data.Text as Text
39 import qualified Data.Vector as Vector
40 import qualified Data.Text.Lazy as Lazy
41 import qualified Data.GraphViz.Attributes.HTML as H
47 dotToFile :: FilePath -> DotGraph DotId -> IO ()
48 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
50 dotToString :: DotGraph DotId -> [Char]
51 dotToString dotG = unpack (printDotGraph dotG)
53 dynamicToColor :: Double -> H.Attribute
55 | d == 0 = H.BGColor (toColor LightCoral)
56 | d == 1 = H.BGColor (toColor Khaki)
57 | d == 2 = H.BGColor (toColor SkyBlue)
58 | otherwise = H.Color (toColor Black)
60 pickLabelColor :: [Double] -> H.Attribute
62 | elem 0 lst = dynamicToColor 0
63 | elem 2 lst = dynamicToColor 2
64 | elem 1 lst = dynamicToColor 1
65 | otherwise = dynamicToColor 3
67 toDotLabel :: Text.Text -> Label
68 toDotLabel lbl = StrLabel $ fromStrict lbl
70 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
71 toAttr k v = customAttribute k v
73 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
74 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
76 groupIdToDotId :: PhyloGroupId -> DotId
77 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
79 branchIdToDotId :: PhyloBranchId -> DotId
80 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
82 periodIdToDotId :: PhyloPeriodId -> DotId
83 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
85 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
86 groupToTable fdt g = H.Table H.HTable
87 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
88 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
89 , H.tableRows = [header]
90 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
91 <> ( map ngramsToRow $ splitEvery 4
92 $ reverse $ sortOn (snd . snd)
93 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
94 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
96 --------------------------------------
97 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
98 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
99 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
100 --------------------------------------
103 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
104 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
105 <> (fromStrict " ( ")
106 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
107 <> (fromStrict " , ")
108 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
109 <> (fromStrict " ) ")
110 <> (pack $ show (getGroupId g)))]]
111 --------------------------------------
113 branchToDotNode :: PhyloBranch -> Dot DotId
115 node (branchIdToDotId $ b ^. branch_id)
116 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
117 <> (metaToAttr $ b ^. branch_meta)
118 <> [ toAttr "nodeType" "branch"
119 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id)) ])
121 periodToDotNode :: (Date,Date) -> Dot DotId
122 periodToDotNode prd =
123 node (periodIdToDotId prd)
124 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
125 <> [ toAttr "nodeType" "period"
126 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
127 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
131 groupToDotNode fdt g =
132 node (groupIdToDotId $ getGroupId g)
133 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
134 <> [ toAttr "nodeType" "group"
135 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
136 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
137 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
138 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
141 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
142 toDotEdge source target lbl edgeType = edge source target
144 GroupToGroup -> [ Width 10, Color [toWColor Black], Constraint True
145 , Label (StrLabel $ fromStrict lbl)]
146 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
147 , Label (StrLabel $ fromStrict lbl)]
148 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
149 , Label (StrLabel $ fromStrict lbl)]
150 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
153 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
154 mergePointers groups =
155 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
156 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
157 in unionWith (\w w' -> max w w') toChilds toParents
160 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
161 exportToDot phylo export =
162 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
163 <> show(length $ export ^. export_groups) <> " groups "
164 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
165 <> "##########################") $
166 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
168 -- | 1) init the dot graph
169 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
170 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
172 , Style [SItem Filled []],Color [toWColor White]]
173 -- | home made attributes
174 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
175 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
176 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
177 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
178 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
182 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
184 -- | 2) create a layer for the branches labels
185 subgraph (Str "Branches peaks") $ do
187 graphAttrs [Rank SameRank]
189 -- | 3) group the branches by hierarchy
190 -- mapM (\branches ->
191 -- subgraph (Str "Branches clade") $ do
192 -- graphAttrs [Rank SameRank]
194 -- -- | 4) create a node for each branch
195 -- mapM branchToDotNode branches
196 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
198 mapM branchToDotNode $ export ^. export_branches
200 -- | 5) create a layer for each period
201 _ <- mapM (\period ->
202 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
203 graphAttrs [Rank SameRank]
204 periodToDotNode period
206 -- | 6) create a node for each group
207 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
208 ) $ getPeriodIds phylo
210 -- | 7) create the edges between a branch and its first groups
211 _ <- mapM (\(bId,groups) ->
212 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
215 $ map (\groups -> head' "toDot"
216 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
217 $ sortOn (fst . _phylo_groupPeriod) groups)
218 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
220 -- | 8) create the edges between the groups
221 _ <- mapM (\((k,k'),_) ->
222 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
223 ) $ (toList . mergePointers) $ export ^. export_groups
225 -- | 7) create the edges between the periods
226 _ <- mapM (\(prd,prd') ->
227 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
228 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
230 -- | 8) create the edges between the branches
231 _ <- mapM (\(bId,bId') ->
232 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
233 (Text.pack $ show(branchIdsToProximity bId bId'
234 (getThresholdInit $ phyloProximity $ getConfig phylo)
235 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
236 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
239 graphAttrs [Rank SameRank]
249 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
250 filterByBranchSize thr export =
251 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
252 in export & export_branches .~ (fst branches')
253 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
256 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
257 processFilters filters qua export =
258 foldl (\export' f -> case f of
259 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
260 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
261 else filterByBranchSize thr export'
268 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
269 sortByHierarchy depth branches =
270 if (length branches == 1)
274 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
275 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
276 ++ (sortByHierarchy (depth + 1) (snd partitions)))
277 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
278 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
281 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
282 sortByBirthDate order export =
283 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
284 branches' = case order of
286 Desc -> reverse branches
287 in export & export_branches .~ branches'
289 processSort :: Sort -> PhyloExport -> PhyloExport
290 processSort sort' export = case sort' of
291 ByBirthDate o -> sortByBirthDate o export
292 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
299 -- | Return the conditional probability of i knowing j
300 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
301 conditional m i j = (findWithDefault 0 (i,j) m)
305 -- | Return the genericity score of a given ngram
306 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
307 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
308 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
311 -- | Return the specificity score of a given ngram
312 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
313 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
314 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
317 -- | Return the inclusion score of a given ngram
318 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
319 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
320 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
323 ngramsMetrics :: PhyloExport -> PhyloExport
324 ngramsMetrics export =
327 (\g -> g & phylo_groupMeta %~ insert "genericity"
328 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
329 & phylo_groupMeta %~ insert "specificity"
330 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
331 & phylo_groupMeta %~ insert "inclusion"
332 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
336 branchDating :: PhyloExport -> PhyloExport
337 branchDating export =
338 over ( export_branches
341 let groups = sortOn fst
342 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
343 then acc ++ [g ^. phylo_groupPeriod]
344 else acc ) [] $ export ^. export_groups
346 birth = fst $ head' "birth" groups
347 age = (snd $ last' "age" groups) - birth
348 in b & branch_meta %~ insert "birth" [fromIntegral birth]
349 & branch_meta %~ insert "age" [fromIntegral age]
350 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
352 processMetrics :: PhyloExport -> PhyloExport
353 processMetrics export = ngramsMetrics
354 $ branchDating export
361 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
362 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
365 $ sortOn snd $ zip [0..] meta
368 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
369 mostInclusive nth foundations export =
370 over ( export_branches
373 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
374 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
375 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
376 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
377 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
378 in b & branch_label .~ lbl ) export
381 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
382 mostEmergentInclusive nth foundations export =
386 let lbl = ngramsToLabel foundations
388 $ map (\(_,(_,idx)) -> idx)
390 $ map (\groups -> sortOn (fst . snd) groups)
391 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
392 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
393 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
394 in g & phylo_groupLabel .~ lbl ) export
397 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
398 processLabels labels foundations export =
399 foldl (\export' label ->
401 GroupLabel tagger nth ->
403 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
404 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
405 BranchLabel tagger nth ->
407 MostInclusive -> mostInclusive nth foundations export'
408 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
416 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
417 toDynamics n parents group m =
418 let prd = group ^. phylo_groupPeriod
419 end = last' "dynamics" (sort $ map snd $ elems m)
420 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
423 else if ((fst prd) == (fst $ m ! n))
431 --------------------------------------
433 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
436 processDynamics :: [PhyloGroup] -> [PhyloGroup]
437 processDynamics groups =
439 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
440 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
441 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
443 --------------------------------------
444 mapNgrams :: Map Int (Date,Date)
445 mapNgrams = map (\dates ->
446 let dates' = sort dates
447 in (head' "dynamics" dates', last' "dynamics" dates'))
449 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
450 $ (g ^. phylo_groupNgrams))) [] groups
453 ---------------------
454 -- | phyloExport | --
455 ---------------------
458 toPhyloExport :: Phylo -> DotGraph DotId
459 toPhyloExport phylo = exportToDot phylo
460 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
461 $ processSort (exportSort $ getConfig phylo)
462 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
463 $ processMetrics export
465 export :: PhyloExport
466 export = PhyloExport groups branches
467 --------------------------------------
468 branches :: [PhyloBranch]
469 branches = map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
470 --------------------------------------
471 groups :: [PhyloGroup]
472 groups = traceExportGroups
474 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
475 $ tracePhyloInfo phylo
478 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
479 traceExportBranches branches = trace ("\n"
480 <> "-- | Export " <> show(length branches) <> " branches") branches
482 tracePhyloInfo :: Phylo -> Phylo
483 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
484 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
485 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
489 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
490 traceExportGroups groups = trace ("\n" <> "-- | Export "
491 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
492 <> show(length groups) <> " groups and "
493 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"