]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
fix accuracy
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE NoImplicitPrelude #-}
12 {-# LANGUAGE FlexibleContexts #-}
13 {-# LANGUAGE OverloadedStrings #-}
14 {-# LANGUAGE MultiParamTypeClasses #-}
15 {-# LANGUAGE TypeSynonymInstances #-}
16 {-# LANGUAGE FlexibleInstances #-}
17
18 module Gargantext.Viz.Phylo.PhyloExport where
19
20 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
21 import Data.List ((++), sort, nub, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, unwords, nubBy)
22 import Data.Vector (Vector)
23
24 import Prelude (writeFile)
25 import Gargantext.Prelude
26 import Gargantext.Viz.AdaptativePhylo
27 import Gargantext.Viz.Phylo.PhyloTools
28
29 import Control.Lens
30 import Data.GraphViz hiding (DotGraph, Order)
31 import Data.GraphViz.Types.Generalised (DotGraph)
32 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
33 import Data.GraphViz.Types.Monadic
34 import Data.Text.Lazy (fromStrict, pack, unpack)
35 import System.FilePath
36 import Debug.Trace (trace)
37
38 import qualified Data.Text as Text
39 import qualified Data.Text.Lazy as Lazy
40 import qualified Data.GraphViz.Attributes.HTML as H
41
42 --------------------
43 -- | Dot export | --
44 --------------------
45
46 dotToFile :: FilePath -> DotGraph DotId -> IO ()
47 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
48
49 dotToString :: DotGraph DotId -> [Char]
50 dotToString dotG = unpack (printDotGraph dotG)
51
52 dynamicToColor :: Double -> H.Attribute
53 dynamicToColor d
54 | d == 0 = H.BGColor (toColor LightCoral)
55 | d == 1 = H.BGColor (toColor Khaki)
56 | d == 2 = H.BGColor (toColor SkyBlue)
57 | otherwise = H.Color (toColor Black)
58
59 pickLabelColor :: [Double] -> H.Attribute
60 pickLabelColor lst
61 | elem 0 lst = dynamicToColor 0
62 | elem 2 lst = dynamicToColor 2
63 | elem 1 lst = dynamicToColor 1
64 | otherwise = dynamicToColor 3
65
66 toDotLabel :: Text.Text -> Label
67 toDotLabel lbl = StrLabel $ fromStrict lbl
68
69 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
70 toAttr k v = customAttribute k v
71
72 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
73 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
74
75 groupIdToDotId :: PhyloGroupId -> DotId
76 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
77
78 branchIdToDotId :: PhyloBranchId -> DotId
79 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
80
81 periodIdToDotId :: PhyloPeriodId -> DotId
82 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
83
84 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
85 groupToTable fdt g = H.Table H.HTable
86 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
87 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
88 , H.tableRows = [header]
89 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
90 <> ( map ngramsToRow $ splitEvery 4
91 $ reverse $ sortOn (snd . snd)
92 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
93 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
94 where
95 --------------------------------------
96 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
97 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
98 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> (fromStrict " ( ")
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> (fromStrict " , ")
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> (fromStrict " ) "))]]
109 --------------------------------------
110
111 branchToDotNode :: PhyloBranch -> Dot DotId
112 branchToDotNode b =
113 node (branchIdToDotId $ b ^. branch_id)
114 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
115 <> (metaToAttr $ b ^. branch_meta)
116 <> [ toAttr "nodeType" "branch"
117 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ b ^. branch_id)) ])
118
119 periodToDotNode :: (Date,Date) -> Dot DotId
120 periodToDotNode prd =
121 node (periodIdToDotId prd)
122 ([Shape Square, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
123 <> [ toAttr "nodeType" "period"
124 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
125 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
126
127
128 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Dot DotId
129 groupToDotNode fdt g =
130 node (groupIdToDotId $ getGroupId g)
131 ([FontName "Arial", Shape Square, toLabel (groupToTable fdt g)]
132 <> [ toAttr "nodeType" "group"
133 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
134 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
135 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))])
136
137
138 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
139 toDotEdge source target lbl edgeType = edge source target
140 (case edgeType of
141 GroupToGroup -> [ Width 2, Color [toWColor Black], Constraint True
142 , Label (StrLabel $ fromStrict lbl)]
143 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
144 , Label (StrLabel $ fromStrict lbl)]
145 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
146 , Label (StrLabel $ fromStrict lbl)]
147 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
148
149
150 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
151 mergePointers groups =
152 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
153 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
154 in unionWith (\w w' -> max w w') toChilds toParents
155
156
157 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
158 exportToDot phylo export =
159 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
160 <> show(length $ export ^. export_groups) <> " groups to a dot file\n") $
161 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
162
163 -- | 1) init the dot graph
164 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
165 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
166 , Ratio FillRatio
167 , Style [SItem Filled []],Color [toWColor White]]
168 -- | home made attributes
169 <> [(toAttr (fromStrict "nbDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
170 ,(toAttr (fromStrict "proxiName") $ pack $ show (getProximityName $ phyloProximity $ getConfig phylo))
171 ,(toAttr (fromStrict "proxiInit") $ pack $ show (getProximityInit $ phyloProximity $ getConfig phylo))
172 ,(toAttr (fromStrict "proxiStep") $ pack $ show (getProximityStep $ phyloProximity $ getConfig phylo))
173 ,(toAttr (fromStrict "quaFactor") $ pack $ show (_qua_relevance $ phyloQuality $ getConfig phylo))
174 ])
175
176
177 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
178
179 -- | 2) create a layer for the branches labels
180 subgraph (Str "Branches peaks") $ do
181
182 graphAttrs [Rank SameRank]
183
184 -- | 3) group the branches by hierarchy
185 -- mapM (\branches ->
186 -- subgraph (Str "Branches clade") $ do
187 -- graphAttrs [Rank SameRank]
188
189 -- -- | 4) create a node for each branch
190 -- mapM branchToDotNode branches
191 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
192
193 mapM branchToDotNode $ export ^. export_branches
194
195 -- | 5) create a layer for each period
196 _ <- mapM (\period ->
197 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
198 graphAttrs [Rank SameRank]
199 periodToDotNode period
200
201 -- | 6) create a node for each group
202 mapM (\g -> groupToDotNode (getRoots phylo) g) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
203 ) $ getPeriodIds phylo
204
205 -- | 7) create the edges between a branch and its first groups
206 _ <- mapM (\(bId,groups) ->
207 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
208 )
209 $ toList
210 $ map (\groups -> head' "toDot"
211 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
212 $ sortOn (fst . _phylo_groupPeriod) groups)
213 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
214
215 -- | 8) create the edges between the groups
216 _ <- mapM (\((k,k'),_) ->
217 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
218 ) $ (toList . mergePointers) $ export ^. export_groups
219
220 -- | 7) create the edges between the periods
221 _ <- mapM (\(prd,prd') ->
222 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
223 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
224
225 -- | 8) create the edges between the branches
226 _ <- mapM (\(bId,bId') ->
227 toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
228 (Text.pack $ show(branchIdsToProximity bId bId'
229 (getThresholdInit $ phyloProximity $ getConfig phylo)
230 (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
231 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
232
233
234 graphAttrs [Rank SameRank]
235
236
237
238
239
240 ----------------
241 -- | Filter | --
242 ----------------
243
244 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
245 filterByBranchSize thr export =
246 let branches' = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
247 in export & export_branches .~ (fst branches')
248 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd branches')))
249
250
251 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
252 processFilters filters qua export =
253 foldl (\export' f -> case f of
254 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
255 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
256 else filterByBranchSize thr export'
257 ) export filters
258
259 --------------
260 -- | Sort | --
261 --------------
262
263 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
264 sortByHierarchy depth branches =
265 if (length branches == 1)
266 then branches
267 else concat
268 $ map (\branches' ->
269 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
270 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
271 ++ (sortByHierarchy (depth + 1) (snd partitions)))
272 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
273 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
274
275
276 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
277 sortByBirthDate order export =
278 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
279 branches' = case order of
280 Asc -> branches
281 Desc -> reverse branches
282 in export & export_branches .~ branches'
283
284 processSort :: Sort -> PhyloExport -> PhyloExport
285 processSort sort' export = case sort' of
286 ByBirthDate o -> sortByBirthDate o export
287 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
288
289
290 -----------------
291 -- | Metrics | --
292 -----------------
293
294 -- | Return the conditional probability of i knowing j
295 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
296 conditional m i j = (findWithDefault 0 (i,j) m)
297 / (m ! (j,j))
298
299
300 -- | Return the genericity score of a given ngram
301 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
302 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
303 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
304
305
306 -- | Return the specificity score of a given ngram
307 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
308 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
309 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
310
311
312 -- | Return the inclusion score of a given ngram
313 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
314 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
315 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
316
317
318 ngramsMetrics :: PhyloExport -> PhyloExport
319 ngramsMetrics export =
320 over ( export_groups
321 . traverse )
322 (\g -> g & phylo_groupMeta %~ insert "genericity"
323 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
324 & phylo_groupMeta %~ insert "specificity"
325 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
326 & phylo_groupMeta %~ insert "inclusion"
327 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
328 ) export
329
330
331 branchDating :: PhyloExport -> PhyloExport
332 branchDating export =
333 over ( export_branches
334 . traverse )
335 (\b ->
336 let groups = sortOn fst
337 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
338 then acc ++ [g ^. phylo_groupPeriod]
339 else acc ) [] $ export ^. export_groups
340 birth = fst $ head' "birth" groups
341 age = (snd $ last' "age" groups) - birth
342 in b & branch_meta %~ insert "birth" [fromIntegral birth]
343 & branch_meta %~ insert "age" [fromIntegral age]
344 & branch_meta %~ insert "size" [fromIntegral $ length groups] ) export
345
346 processMetrics :: PhyloExport -> PhyloExport
347 processMetrics export = ngramsMetrics
348 $ branchDating export
349
350
351 -----------------
352 -- | Taggers | --
353 -----------------
354
355 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
356 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
357 $ take nth
358 $ reverse
359 $ sortOn snd $ zip [0..] meta
360
361
362 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
363 mostInclusive nth foundations export =
364 over ( export_branches
365 . traverse )
366 (\b ->
367 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
368 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
369 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
370 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
371 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
372 in b & branch_label .~ lbl ) export
373
374
375 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
376 mostEmergentInclusive nth foundations export =
377 over ( export_groups
378 . traverse )
379 (\g ->
380 let lbl = ngramsToLabel foundations
381 $ take nth
382 $ map (\(_,(_,idx)) -> idx)
383 $ concat
384 $ map (\groups -> sortOn (fst . snd) groups)
385 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
386 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
387 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
388 in g & phylo_groupLabel .~ lbl ) export
389
390
391 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
392 processLabels labels foundations export =
393 foldl (\export' label ->
394 case label of
395 GroupLabel tagger nth ->
396 case tagger of
397 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
398 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
399 BranchLabel tagger nth ->
400 case tagger of
401 MostInclusive -> mostInclusive nth foundations export'
402 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
403
404
405 ------------------
406 -- | Dynamics | --
407 ------------------
408
409
410 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
411 toDynamics n parents group m =
412 let prd = group ^. phylo_groupPeriod
413 end = last' "dynamics" (sort $ map snd $ elems m)
414 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
415 -- | decrease
416 then 2
417 else if ((fst prd) == (fst $ m ! n))
418 -- | recombination
419 then 0
420 else if isNew
421 -- | emergence
422 then 1
423 else 3
424 where
425 --------------------------------------
426 isNew :: Bool
427 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
428
429
430 processDynamics :: [PhyloGroup] -> [PhyloGroup]
431 processDynamics groups =
432 map (\g ->
433 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
434 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
435 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
436 where
437 --------------------------------------
438 mapNgrams :: Map Int (Date,Date)
439 mapNgrams = map (\dates ->
440 let dates' = sort dates
441 in (head' "dynamics" dates', last' "dynamics" dates'))
442 $ fromListWith (++)
443 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
444 $ (g ^. phylo_groupNgrams))) [] groups
445
446
447 ---------------------
448 -- | phyloExport | --
449 ---------------------
450
451
452 toPhyloExport :: Phylo -> DotGraph DotId
453 toPhyloExport phylo = exportToDot phylo
454 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
455 $ processSort (exportSort $ getConfig phylo)
456 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
457 $ processMetrics export
458 where
459 export :: PhyloExport
460 export = PhyloExport groups branches
461 --------------------------------------
462 branches :: [PhyloBranch]
463 branches = traceExportBranches $ map (\bId -> PhyloBranch bId "" empty) $ nub $ map _phylo_groupBranchId groups
464 --------------------------------------
465 groups :: [PhyloGroup]
466 groups = processDynamics
467 $ getGroupsFromLevel (phyloLevel $ getConfig phylo) phylo
468
469
470 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
471 traceExportBranches branches = trace ("\n" <> "-- | Export " <> show(length branches) <> " branches") branches