]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[phylo] filter threshold changed
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
24
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
34
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
39
40 --------------------
41 -- | Dot export | --
42 --------------------
43
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
46
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
49
50 dynamicToColor :: Double -> H.Attribute
51 dynamicToColor d
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
56
57 pickLabelColor :: [Double] -> H.Attribute
58 pickLabelColor lst
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
63
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
66
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
69
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
72
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
75
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
78
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
81
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
98 header :: H.Row
99 header =
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
109
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
121 ])
122
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130
131
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
145 ])
146
147
148 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
150 (case edgeType of
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
152 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
153 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
154 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
155 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
156
157
158 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
159 mergePointers groups =
160 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
161 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
162 in unionWith (\w w' -> max w w') toChilds toParents
163
164 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
165 mergeAncestors groups = concat
166 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
167 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
168
169
170 toBid :: PhyloGroup -> [PhyloBranch] -> Int
171 toBid g bs =
172 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
173 in fromJust $ elemIndex b' bs
174
175 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
176 exportToDot phylo export =
177 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
178 <> show(length $ export ^. export_groups) <> " groups "
179 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
180 <> "##########################") $
181 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
182
183 {- 1) init the dot graph -}
184 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
185 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
186 , Ratio FillRatio
187 , Style [SItem Filled []],Color [toWColor White]]
188 {-- home made attributes -}
189 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
190 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
191 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
192 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
193 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
194 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
196 ])
197
198 {-
199 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
200
201 -- 2) create a layer for the branches labels -}
202 subgraph (Str "Branches peaks") $ do
203
204 graphAttrs [Rank SameRank]
205 {-
206 -- 3) group the branches by hierarchy
207 -- mapM (\branches ->
208 -- subgraph (Str "Branches clade") $ do
209 -- graphAttrs [Rank SameRank]
210
211 -- -- 4) create a node for each branch
212 -- mapM branchToDotNode branches
213 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
214 -}
215 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
216
217 {-- 5) create a layer for each period -}
218 _ <- mapM (\period ->
219 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
220 graphAttrs [Rank SameRank]
221 periodToDotNode period
222
223 {-- 6) create a node for each group -}
224 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
225 ) $ getPeriodIds phylo
226
227 {-- 7) create the edges between a branch and its first groups -}
228 _ <- mapM (\(bId,groups) ->
229 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
230 )
231 $ toList
232 $ map (\groups -> head' "toDot"
233 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
234 $ sortOn (fst . _phylo_groupPeriod) groups)
235 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
236
237 {- 8) create the edges between the groups -}
238 _ <- mapM (\((k,k'),v) ->
239 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
240 ) $ (toList . mergePointers) $ export ^. export_groups
241
242 _ <- mapM (\((k,k'),v) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
244 ) $ mergeAncestors $ export ^. export_groups
245
246 -- | 10) create the edges between the periods
247 _ <- mapM (\(prd,prd') ->
248 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
249 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
250
251 {- 8) create the edges between the branches
252 -- _ <- mapM (\(bId,bId') ->
253 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
254 -- (Text.pack $ show(branchIdsToProximity bId bId'
255 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
256 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
257 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
258 -}
259
260
261 graphAttrs [Rank SameRank]
262
263
264 ----------------
265 -- | Filter | --
266 ----------------
267
268 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
269 filterByBranchSize thr export =
270 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
271 in export & export_branches .~ (fst splited)
272 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
273
274
275 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
276 processFilters filters qua export =
277 foldl (\export' f -> case f of
278 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
279 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
280 else filterByBranchSize thr export'
281 ) export filters
282
283 --------------
284 -- | Sort | --
285 --------------
286
287 branchToIso :: [PhyloBranch] -> [PhyloBranch]
288 branchToIso branches =
289 let steps = map sum
290 $ inits
291 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
292 $ zip branches
293 $ ([0] ++ (map (\(b,b') ->
294 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
295 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
296 in
297 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
298 then (b' ^. branch_seaLevel) !! (lmin - 1)
299 else (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
302 $ zip steps branches
303
304 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
305 branchToIso' start step branches =
306 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
307 $ inits
308 $ ([0] ++ (map (\(b,b') ->
309 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
310 in 1 - start - step * root) $ listToSeq branches))
311 in map (\(x,b) -> b & branch_x .~ x)
312 $ zip bx branches
313
314
315 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
316 sortByHierarchy depth branches =
317 if (length branches == 1)
318 then branches
319 else concat
320 $ map (\branches' ->
321 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
322 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
323 ++ (sortByHierarchy (depth + 1) (snd partitions)))
324 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
325 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
326
327
328 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
329 sortByBirthDate order export =
330 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
331 branches' = case order of
332 Asc -> branches
333 Desc -> reverse branches
334 in export & export_branches .~ branches'
335
336 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
337 processSort sort' elev export = case sort' of
338 ByBirthDate o -> sortByBirthDate o export
339 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
340 $ sortByHierarchy 0 (export ^. export_branches))
341
342
343 -----------------
344 -- | Metrics | --
345 -----------------
346
347 -- | Return the conditional probability of i knowing j
348 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
349 conditional m i j = (findWithDefault 0 (i,j) m)
350 / (m ! (j,j))
351
352
353 -- | Return the genericity score of a given ngram
354 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
355 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
356 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
357
358
359 -- | Return the specificity score of a given ngram
360 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
361 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
362 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
363
364
365 -- | Return the inclusion score of a given ngram
366 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
367 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
368 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
369
370
371 ngramsMetrics :: PhyloExport -> PhyloExport
372 ngramsMetrics export =
373 over ( export_groups
374 . traverse )
375 (\g -> g & phylo_groupMeta %~ insert "genericity"
376 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
377 & phylo_groupMeta %~ insert "specificity"
378 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
379 & phylo_groupMeta %~ insert "inclusion"
380 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
381 ) export
382
383
384 branchDating :: PhyloExport -> PhyloExport
385 branchDating export =
386 over ( export_branches
387 . traverse )
388 (\b ->
389 let groups = sortOn fst
390 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
391 then acc ++ [g ^. phylo_groupPeriod]
392 else acc ) [] $ export ^. export_groups
393 periods = nub groups
394 birth = fst $ head' "birth" groups
395 age = (snd $ last' "age" groups) - birth
396 in b & branch_meta %~ insert "birth" [fromIntegral birth]
397 & branch_meta %~ insert "age" [fromIntegral age]
398 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
399
400 processMetrics :: PhyloExport -> PhyloExport
401 processMetrics export = ngramsMetrics
402 $ branchDating export
403
404
405 -----------------
406 -- | Taggers | --
407 -----------------
408
409 nk :: Int -> [[Int]] -> Int
410 nk n groups = sum
411 $ map (\g -> if (elem n g)
412 then 1
413 else 0) groups
414
415
416 tf :: Int -> [[Int]] -> Double
417 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
418
419
420 idf :: Int -> [[Int]] -> Double
421 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
422
423
424 findTfIdf :: [[Int]] -> [(Int,Double)]
425 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
426
427
428 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
429 findEmergences groups freq =
430 let ngrams = map _phylo_groupNgrams groups
431 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
432 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
433 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
434 in reverse $ sortOn snd
435 $ map (\(n,_) -> if (member n freq)
436 then (n,freq ! n)
437 else (n,0)) emerging
438
439
440 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
441 mostEmergentTfIdf nth freq foundations export =
442 over ( export_branches
443 . traverse )
444 (\b ->
445 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
446 tfidf = findTfIdf (map _phylo_groupNgrams groups)
447 emergences = findEmergences groups freq
448 selected = if (null emergences)
449 then map fst $ take nth tfidf
450 else [fst $ head' "mostEmergentTfIdf" emergences]
451 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
452 in b & branch_label .~ (ngramsToLabel foundations selected)) export
453
454
455 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
456 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
457 $ take nth
458 $ reverse
459 $ sortOn snd $ zip [0..] meta
460
461
462 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
463 mostInclusive nth foundations export =
464 over ( export_branches
465 . traverse )
466 (\b ->
467 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
468 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
469 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
470 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
471 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
472 in b & branch_label .~ lbl ) export
473
474
475 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
476 mostEmergentInclusive nth foundations export =
477 over ( export_groups
478 . traverse )
479 (\g ->
480 let lbl = ngramsToLabel foundations
481 $ take nth
482 $ map (\(_,(_,idx)) -> idx)
483 $ concat
484 $ map (\groups -> sortOn (fst . snd) groups)
485 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
486 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
487 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
488 in g & phylo_groupLabel .~ lbl ) export
489
490
491 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
492 processLabels labels foundations freq export =
493 foldl (\export' label ->
494 case label of
495 GroupLabel tagger nth ->
496 case tagger of
497 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
498 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
499 BranchLabel tagger nth ->
500 case tagger of
501 MostInclusive -> mostInclusive nth foundations export'
502 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
503 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
504
505
506 ------------------
507 -- | Dynamics | --
508 ------------------
509
510
511 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
512 toDynamics n parents g m =
513 let prd = g ^. phylo_groupPeriod
514 end = last' "dynamics" (sort $ map snd $ elems m)
515 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
516 {- decrease -}
517 then 2
518 else if ((fst prd) == (fst $ m ! n))
519 {- emerging -}
520 then 0
521 else if isNew
522 {- emergence -}
523 then 1
524 else 3
525 where
526 --------------------------------------
527 isNew :: Bool
528 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
529
530
531 processDynamics :: [PhyloGroup] -> [PhyloGroup]
532 processDynamics groups =
533 map (\g ->
534 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
535 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
536 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
537 where
538 --------------------------------------
539 mapNgrams :: Map Int (Date,Date)
540 mapNgrams = map (\dates ->
541 let dates' = sort dates
542 in (head' "dynamics" dates', last' "dynamics" dates'))
543 $ fromListWith (++)
544 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
545 $ (g ^. phylo_groupNgrams))) [] groups
546
547
548 -----------------
549 -- | horizon | --
550 -----------------
551
552 getGroupThr :: Double -> PhyloGroup -> Double
553 getGroupThr step g =
554 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
555 breaks = (g ^. phylo_groupMeta) ! "breaks"
556 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
557
558 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
559 toAncestor nbDocs diago proximity step candidates ego =
560 let curr = ego ^. phylo_groupAncestors
561 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
562 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
563 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
564 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
565
566
567 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
568 headsToAncestors nbDocs diago proximity step heads acc =
569 if (null heads)
570 then acc
571 else
572 let ego = head' "headsToAncestors" heads
573 heads' = tail' "headsToAncestors" heads
574 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
575
576
577 toHorizon :: Phylo -> Phylo
578 toHorizon phylo =
579 let phyloAncestor = updatePhyloGroups
580 level
581 (fromList $ map (\g -> (getGroupId g, g))
582 $ concat
583 $ tracePhyloAncestors newGroups) phylo
584 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
585 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
586 in updatePhyloGroups level reBranched phylo
587 where
588 -- | 1) for each periods
589 periods :: [PhyloPeriodId]
590 periods = getPeriodIds phylo
591 -- --
592 level :: Level
593 level = getLastLevel phylo
594 -- --
595 frame :: Int
596 frame = getTimeFrame $ timeUnit $ getConfig phylo
597 -- | 2) find ancestors between groups without parents
598 mapGroups :: [[PhyloGroup]]
599 mapGroups = map (\prd ->
600 let groups = getGroupsFromLevelPeriods level [prd] phylo
601 childs = getPreviousChildIds level frame prd periods phylo
602 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
603 noHeads = groups \\ heads
604 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
605 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
606 proximity = (phyloProximity $ getConfig phylo)
607 step = case getSeaElevation phylo of
608 Constante _ s -> s
609 Adaptative _ -> undefined
610 -- in headsToAncestors nbDocs diago proximity heads groups []
611 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
612 $ headsToAncestors nbDocs diago proximity step heads []
613 ) periods
614 -- | 3) process this task concurrently
615 newGroups :: [[PhyloGroup]]
616 newGroups = mapGroups `using` parList rdeepseq
617 --------------------------------------
618
619 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
620 getPreviousChildIds lvl frame curr prds phylo =
621 concat $ map ((map fst) . _phylo_groupPeriodChilds)
622 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
623
624 ---------------------
625 -- | phyloExport | --
626 ---------------------
627
628 toPhyloExport :: Phylo -> DotGraph DotId
629 toPhyloExport phylo = exportToDot phylo
630 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
631 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
632 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
633 $ processMetrics export
634 where
635 export :: PhyloExport
636 export = PhyloExport groups branches
637 --------------------------------------
638 branches :: [PhyloBranch]
639 branches = map (\g ->
640 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
641 breaks = (g ^. phylo_groupMeta) ! "breaks"
642 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
643 in PhyloBranch (g ^. phylo_groupBranchId)
644 canonId
645 seaLvl
646 0
647 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
648 0
649 0
650 "" empty)
651 $ map (\gs -> head' "export" gs)
652 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
653 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
654 --------------------------------------
655 groups :: [PhyloGroup]
656 groups = traceExportGroups
657 $ processDynamics
658 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
659 $ tracePhyloInfo
660 $ toHorizon phylo
661
662
663 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
664 traceExportBranches branches = trace ("\n"
665 <> "-- | Export " <> show(length branches) <> " branches") branches
666
667 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
668 tracePhyloAncestors groups = trace ("\n"
669 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
670 ) groups
671
672 tracePhyloInfo :: Phylo -> Phylo
673 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
674 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
675 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
676 ) phylo
677
678
679 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
680 traceExportGroups groups = trace ("\n" <> "-- | Export "
681 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
682 <> show(length groups) <> " groups and "
683 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
684 ) groups
685