]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[Comment] last changes commented
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
24
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
34
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
39
40 --------------------
41 -- | Dot export | --
42 --------------------
43
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
46
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
49
50 dynamicToColor :: Double -> H.Attribute
51 dynamicToColor d
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
56
57 pickLabelColor :: [Double] -> H.Attribute
58 pickLabelColor lst
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
63
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
66
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
69
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
72
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
75
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
78
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
81
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
98 header :: H.Row
99 header =
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
109
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
121 ])
122
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130
131
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "label" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
145 ])
146
147
148 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
150 (case edgeType of
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
152 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
154 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
155 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
156 , Label (StrLabel $ fromStrict lbl)]
157 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
158 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
159 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
160
161
162 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
163 mergePointers groups =
164 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
165 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
166 in unionWith (\w w' -> max w w') toChilds toParents
167
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
172
173
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
175 toBid g bs =
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
178
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
186
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
190 , Ratio FillRatio
191 , Style [SItem Filled []],Color [toWColor White]]
192 {-- home made attributes -}
193 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
194 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
196 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
197 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
198 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
199 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
200 ])
201
202 {-
203 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
204
205 -- 2) create a layer for the branches labels -}
206 subgraph (Str "Branches peaks") $ do
207
208 graphAttrs [Rank SameRank]
209 {-
210 -- 3) group the branches by hierarchy
211 -- mapM (\branches ->
212 -- subgraph (Str "Branches clade") $ do
213 -- graphAttrs [Rank SameRank]
214
215 -- -- 4) create a node for each branch
216 -- mapM branchToDotNode branches
217 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
218 -}
219 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
220
221 {-- 5) create a layer for each period -}
222 _ <- mapM (\period ->
223 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
224 graphAttrs [Rank SameRank]
225 periodToDotNode period
226
227 {-- 6) create a node for each group -}
228 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
229 ) $ getPeriodIds phylo
230
231 {-- 7) create the edges between a branch and its first groups -}
232 _ <- mapM (\(bId,groups) ->
233 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
234 )
235 $ toList
236 $ map (\groups -> head' "toDot"
237 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
238 $ sortOn (fst . _phylo_groupPeriod) groups)
239 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
240
241 {- 8) create the edges between the groups -}
242 _ <- mapM (\((k,k'),_) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
244 ) $ (toList . mergePointers) $ export ^. export_groups
245
246 _ <- mapM (\((k,k'),_) ->
247 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
248 ) $ mergeAncestors $ export ^. export_groups
249
250 -- 10) create the edges between the periods
251 _ <- mapM (\(prd,prd') ->
252 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
253 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
254
255 {- 8) create the edges between the branches
256 -- _ <- mapM (\(bId,bId') ->
257 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
258 -- (Text.pack $ show(branchIdsToProximity bId bId'
259 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
260 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
261 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
262 -}
263
264
265 graphAttrs [Rank SameRank]
266
267
268 ----------------
269 -- | Filter | --
270 ----------------
271
272 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
273 filterByBranchSize thr export =
274 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
275 in export & export_branches .~ (fst splited)
276 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
277
278
279 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
280 processFilters filters qua export =
281 foldl (\export' f -> case f of
282 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
283 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
284 else filterByBranchSize thr export'
285 ) export filters
286
287 --------------
288 -- | Sort | --
289 --------------
290
291 branchToIso :: [PhyloBranch] -> [PhyloBranch]
292 branchToIso branches =
293 let steps = map sum
294 $ inits
295 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
296 $ zip branches
297 $ ([0] ++ (map (\(b,b') ->
298 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
299 in (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
302 $ zip steps branches
303
304
305 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
306 sortByHierarchy depth branches =
307 if (length branches == 1)
308 then branchToIso branches
309 else branchToIso $ concat
310 $ map (\branches' ->
311 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
312 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
313 ++ (sortByHierarchy (depth + 1) (snd partitions)))
314 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
315 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
316
317
318 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
319 sortByBirthDate order export =
320 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
321 branches' = case order of
322 Asc -> branches
323 Desc -> reverse branches
324 in export & export_branches .~ branches'
325
326 processSort :: Sort -> PhyloExport -> PhyloExport
327 processSort sort' export = case sort' of
328 ByBirthDate o -> sortByBirthDate o export
329 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
330
331
332 -----------------
333 -- | Metrics | --
334 -----------------
335
336 -- | Return the conditional probability of i knowing j
337 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
338 conditional m i j = (findWithDefault 0 (i,j) m)
339 / (m ! (j,j))
340
341
342 -- | Return the genericity score of a given ngram
343 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
344 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
345 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
346
347
348 -- | Return the specificity score of a given ngram
349 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
350 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
351 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
352
353
354 -- | Return the inclusion score of a given ngram
355 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
357 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
358
359
360 ngramsMetrics :: PhyloExport -> PhyloExport
361 ngramsMetrics export =
362 over ( export_groups
363 . traverse )
364 (\g -> g & phylo_groupMeta %~ insert "genericity"
365 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
366 & phylo_groupMeta %~ insert "specificity"
367 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
368 & phylo_groupMeta %~ insert "inclusion"
369 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
370 ) export
371
372
373 branchDating :: PhyloExport -> PhyloExport
374 branchDating export =
375 over ( export_branches
376 . traverse )
377 (\b ->
378 let groups = sortOn fst
379 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
380 then acc ++ [g ^. phylo_groupPeriod]
381 else acc ) [] $ export ^. export_groups
382 periods = nub groups
383 birth = fst $ head' "birth" groups
384 age = (snd $ last' "age" groups) - birth
385 in b & branch_meta %~ insert "birth" [fromIntegral birth]
386 & branch_meta %~ insert "age" [fromIntegral age]
387 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
388
389 processMetrics :: PhyloExport -> PhyloExport
390 processMetrics export = ngramsMetrics
391 $ branchDating export
392
393
394 -----------------
395 -- | Taggers | --
396 -----------------
397
398 nk :: Int -> [[Int]] -> Int
399 nk n groups = sum
400 $ map (\g -> if (elem n g)
401 then 1
402 else 0) groups
403
404
405 tf :: Int -> [[Int]] -> Double
406 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
407
408
409 idf :: Int -> [[Int]] -> Double
410 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
411
412
413 findTfIdf :: [[Int]] -> [(Int,Double)]
414 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
415
416
417 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
418 findEmergences groups freq =
419 let ngrams = map _phylo_groupNgrams groups
420 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
421 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
422 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
423 in reverse $ sortOn snd
424 $ map (\(n,_) -> if (member n freq)
425 then (n,freq ! n)
426 else (n,0)) emerging
427
428
429 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
430 mostEmergentTfIdf nth freq foundations export =
431 over ( export_branches
432 . traverse )
433 (\b ->
434 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
435 tfidf = findTfIdf (map _phylo_groupNgrams groups)
436 emergences = findEmergences groups freq
437 selected = if (null emergences)
438 then map fst $ take nth tfidf
439 else [fst $ head' "mostEmergentTfIdf" emergences]
440 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
441 in b & branch_label .~ (ngramsToLabel foundations selected)) export
442
443
444 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
445 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
446 $ take nth
447 $ reverse
448 $ sortOn snd $ zip [0..] meta
449
450
451 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
452 mostInclusive nth foundations export =
453 over ( export_branches
454 . traverse )
455 (\b ->
456 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
457 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
458 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
459 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
460 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
461 in b & branch_label .~ lbl ) export
462
463
464 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
465 mostEmergentInclusive nth foundations export =
466 over ( export_groups
467 . traverse )
468 (\g ->
469 let lbl = ngramsToLabel foundations
470 $ take nth
471 $ map (\(_,(_,idx)) -> idx)
472 $ concat
473 $ map (\groups -> sortOn (fst . snd) groups)
474 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
475 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
476 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
477 in g & phylo_groupLabel .~ lbl ) export
478
479
480 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
481 processLabels labels foundations freq export =
482 foldl (\export' label ->
483 case label of
484 GroupLabel tagger nth ->
485 case tagger of
486 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
487 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
488 BranchLabel tagger nth ->
489 case tagger of
490 MostInclusive -> mostInclusive nth foundations export'
491 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
492 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
493
494
495 ------------------
496 -- | Dynamics | --
497 ------------------
498
499
500 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
501 toDynamics n parents g m =
502 let prd = g ^. phylo_groupPeriod
503 end = last' "dynamics" (sort $ map snd $ elems m)
504 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
505 {- decrease -}
506 then 2
507 else if ((fst prd) == (fst $ m ! n))
508 {- emerging -}
509 then 0
510 else if isNew
511 {- emergence -}
512 then 1
513 else 3
514 where
515 --------------------------------------
516 isNew :: Bool
517 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
518
519
520 processDynamics :: [PhyloGroup] -> [PhyloGroup]
521 processDynamics groups =
522 map (\g ->
523 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
524 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
525 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
526 where
527 --------------------------------------
528 mapNgrams :: Map Int (Date,Date)
529 mapNgrams = map (\dates ->
530 let dates' = sort dates
531 in (head' "dynamics" dates', last' "dynamics" dates'))
532 $ fromListWith (++)
533 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
534 $ (g ^. phylo_groupNgrams))) [] groups
535
536
537 -----------------
538 -- | horizon | --
539 -----------------
540
541 getGroupThr :: Double -> PhyloGroup -> Double
542 getGroupThr step g =
543 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
544 breaks = (g ^. phylo_groupMeta) ! "breaks"
545 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
546
547 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
548 toAncestor nbDocs diago proximity step candidates ego =
549 let curr = ego ^. phylo_groupAncestors
550 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
551 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
552 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
553 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
554
555
556 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
557 headsToAncestors nbDocs diago proximity step heads acc =
558 if (null heads)
559 then acc
560 else
561 let ego = head' "headsToAncestors" heads
562 heads' = tail' "headsToAncestors" heads
563 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
564
565
566 toHorizon :: Phylo -> Phylo
567 toHorizon phylo =
568 let phyloAncestor = updatePhyloGroups
569 level
570 (fromList $ map (\g -> (getGroupId g, g))
571 $ concat
572 $ tracePhyloAncestors newGroups) phylo
573 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
574 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
575 in updatePhyloGroups level reBranched phylo
576 where
577 -- | 1) for each periods
578 periods :: [PhyloPeriodId]
579 periods = getPeriodIds phylo
580 -- --
581 level :: Level
582 level = getLastLevel phylo
583 -- --
584 frame :: Int
585 frame = getTimeFrame $ timeUnit $ getConfig phylo
586 -- | 2) find ancestors between groups without parents
587 mapGroups :: [[PhyloGroup]]
588 mapGroups = map (\prd ->
589 let groups = getGroupsFromLevelPeriods level [prd] phylo
590 childs = getPreviousChildIds level frame prd periods phylo
591 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
592 noHeads = groups \\ heads
593 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
594 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
595 proximity = (phyloProximity $ getConfig phylo)
596 step = case getSeaElevation phylo of
597 Constante _ s -> s
598 Adaptative _ -> undefined
599 -- in headsToAncestors nbDocs diago proximity heads groups []
600 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
601 $ headsToAncestors nbDocs diago proximity step heads []
602 ) periods
603 -- | 3) process this task concurrently
604 newGroups :: [[PhyloGroup]]
605 newGroups = mapGroups `using` parList rdeepseq
606 --------------------------------------
607
608 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
609 getPreviousChildIds lvl frame curr prds phylo =
610 concat $ map ((map fst) . _phylo_groupPeriodChilds)
611 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
612
613 ---------------------
614 -- | phyloExport | --
615 ---------------------
616
617 toPhyloExport :: Phylo -> DotGraph DotId
618 toPhyloExport phylo = exportToDot phylo
619 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
620 $ processSort (exportSort $ getConfig phylo)
621 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
622 $ processMetrics export
623 where
624 export :: PhyloExport
625 export = PhyloExport groups branches
626 --------------------------------------
627 branches :: [PhyloBranch]
628 branches = map (\g ->
629 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
630 breaks = (g ^. phylo_groupMeta) ! "breaks"
631 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
632 in PhyloBranch (g ^. phylo_groupBranchId)
633 canonId
634 seaLvl
635 0
636 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
637 0
638 0
639 "" empty)
640 $ map (\gs -> head' "export" gs)
641 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
642 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
643 --------------------------------------
644 groups :: [PhyloGroup]
645 groups = traceExportGroups
646 $ processDynamics
647 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
648 $ tracePhyloInfo
649 $ toHorizon phylo
650
651
652 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
653 traceExportBranches branches = trace ("\n"
654 <> "-- | Export " <> show(length branches) <> " branches") branches
655
656 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
657 tracePhyloAncestors groups = trace ("\n"
658 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
659 ) groups
660
661 tracePhyloInfo :: Phylo -> Phylo
662 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
663 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
664 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
665 ) phylo
666
667
668 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
669 traceExportGroups groups = trace ("\n" <> "-- | Export "
670 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
671 <> show(length groups) <> " groups and "
672 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
673 ) groups
674