]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
24
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
34
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
39
40 --------------------
41 -- | Dot export | --
42 --------------------
43
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
46
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
49
50 dynamicToColor :: Double -> H.Attribute
51 dynamicToColor d
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
56
57 pickLabelColor :: [Double] -> H.Attribute
58 pickLabelColor lst
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
63
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
66
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
69
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
72
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
75
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
78
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
81
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
98 header :: H.Row
99 header =
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
109
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
121 ])
122
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130
131
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
145 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
146 ])
147
148
149 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
150 toDotEdge source target lbl edgeType = edge source target
151 (case edgeType of
152 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
154 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
155 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
156 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
157
158
159 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
160 mergePointers groups =
161 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
162 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
163 in unionWith (\w w' -> max w w') toChilds toParents
164
165 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
166 mergeAncestors groups = concat
167 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
168 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
169
170
171 toBid :: PhyloGroup -> [PhyloBranch] -> Int
172 toBid g bs =
173 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
174 in fromJust $ elemIndex b' bs
175
176 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
177 exportToDot phylo export =
178 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
179 <> show(length $ export ^. export_groups) <> " groups "
180 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
181 <> "##########################") $
182 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
183
184 {- 1) init the dot graph -}
185 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
186 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
187 , Ratio FillRatio
188 , Style [SItem Filled []],Color [toWColor White]]
189 {-- home made attributes -}
190 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
191 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
192 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
193 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
194 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
195 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
196 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
197 ])
198
199 {-
200 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
201
202 -- 2) create a layer for the branches labels -}
203 subgraph (Str "Branches peaks") $ do
204
205 -- graphAttrs [Rank SameRank]
206 {-
207 -- 3) group the branches by hierarchy
208 -- mapM (\branches ->
209 -- subgraph (Str "Branches clade") $ do
210 -- graphAttrs [Rank SameRank]
211
212 -- -- 4) create a node for each branch
213 -- mapM branchToDotNode branches
214 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
215 -}
216 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
217
218 {-- 5) create a layer for each period -}
219 _ <- mapM (\period ->
220 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
221 graphAttrs [Rank SameRank]
222 periodToDotNode period
223
224 {-- 6) create a node for each group -}
225 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
226 ) $ getPeriodIds phylo
227
228 {-- 7) create the edges between a branch and its first groups -}
229 _ <- mapM (\(bId,groups) ->
230 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
231 )
232 $ toList
233 $ map (\groups -> head' "toDot"
234 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
235 $ sortOn (fst . _phylo_groupPeriod) groups)
236 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
237
238 {- 8) create the edges between the groups -}
239 _ <- mapM (\((k,k'),v) ->
240 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
241 ) $ (toList . mergePointers) $ export ^. export_groups
242
243 _ <- mapM (\((k,k'),v) ->
244 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
245 ) $ mergeAncestors $ export ^. export_groups
246
247 -- 10) create the edges between the periods
248 _ <- mapM (\(prd,prd') ->
249 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
250 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
251
252 {- 8) create the edges between the branches
253 -- _ <- mapM (\(bId,bId') ->
254 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
255 -- (Text.pack $ show(branchIdsToProximity bId bId'
256 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
257 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
258 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
259 -}
260
261
262 graphAttrs [Rank SameRank]
263
264
265 ----------------
266 -- | Filter | --
267 ----------------
268
269 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
270 filterByBranchSize thr export =
271 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
272 in export & export_branches .~ (fst splited)
273 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
274
275
276 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
277 processFilters filters qua export =
278 foldl (\export' f -> case f of
279 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
280 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
281 else filterByBranchSize thr export'
282 ) export filters
283
284 --------------
285 -- | Sort | --
286 --------------
287
288 branchToIso :: [PhyloBranch] -> [PhyloBranch]
289 branchToIso branches =
290 let steps = map sum
291 $ inits
292 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
293 $ zip branches
294 $ ([0] ++ (map (\(b,b') ->
295 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
296 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
297 in
298 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
299 then (b' ^. branch_seaLevel) !! (lmin - 1)
300 else (b' ^. branch_seaLevel) !! (idx - 1)
301 ) $ listToSeq branches))
302 in map (\(x,b) -> b & branch_x .~ x)
303 $ zip steps branches
304
305 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
306 branchToIso' start step branches =
307 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
308 $ inits
309 $ ([0] ++ (map (\(b,b') ->
310 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
311 in 1 - start - step * root) $ listToSeq branches))
312 in map (\(x,b) -> b & branch_x .~ x)
313 $ zip bx branches
314
315
316 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
317 sortByHierarchy depth branches =
318 if (length branches == 1)
319 then branches
320 else concat
321 $ map (\branches' ->
322 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
323 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
324 ++ (sortByHierarchy (depth + 1) (snd partitions)))
325 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
326 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
327
328
329 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
330 sortByBirthDate order export =
331 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
332 branches' = case order of
333 Asc -> branches
334 Desc -> reverse branches
335 in export & export_branches .~ branches'
336
337 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
338 processSort sort' elev export = case sort' of
339 ByBirthDate o -> sortByBirthDate o export
340 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
341 $ sortByHierarchy 0 (export ^. export_branches))
342
343
344 -----------------
345 -- | Metrics | --
346 -----------------
347
348 -- | Return the conditional probability of i knowing j
349 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
350 conditional m i j = (findWithDefault 0 (i,j) m)
351 / (m ! (j,j))
352
353
354 -- | Return the genericity score of a given ngram
355 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
357 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
358
359
360 -- | Return the specificity score of a given ngram
361 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
362 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
363 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
364
365
366 -- | Return the inclusion score of a given ngram
367 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
368 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
369 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
370
371
372 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
373 ngramsMetrics phylo export =
374 over ( export_groups
375 . traverse )
376 (\g -> g & phylo_groupMeta %~ insert "genericity"
377 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
378 & phylo_groupMeta %~ insert "specificity"
379 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
380 & phylo_groupMeta %~ insert "inclusion"
381 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
382 & phylo_groupMeta %~ insert "frequence"
383 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
384 ) export
385
386
387 branchDating :: PhyloExport -> PhyloExport
388 branchDating export =
389 over ( export_branches
390 . traverse )
391 (\b ->
392 let groups = sortOn fst
393 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
394 then acc ++ [g ^. phylo_groupPeriod]
395 else acc ) [] $ export ^. export_groups
396 periods = nub groups
397 birth = fst $ head' "birth" groups
398 age = (snd $ last' "age" groups) - birth
399 in b & branch_meta %~ insert "birth" [fromIntegral birth]
400 & branch_meta %~ insert "age" [fromIntegral age]
401 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
402
403 processMetrics :: Phylo -> PhyloExport -> PhyloExport
404 processMetrics phylo export = ngramsMetrics phylo
405 $ branchDating export
406
407
408 -----------------
409 -- | Taggers | --
410 -----------------
411
412 nk :: Int -> [[Int]] -> Int
413 nk n groups = sum
414 $ map (\g -> if (elem n g)
415 then 1
416 else 0) groups
417
418
419 tf :: Int -> [[Int]] -> Double
420 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
421
422
423 idf :: Int -> [[Int]] -> Double
424 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
425
426
427 findTfIdf :: [[Int]] -> [(Int,Double)]
428 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
429
430
431 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
432 findEmergences groups freq =
433 let ngrams = map _phylo_groupNgrams groups
434 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
435 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
436 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
437 in reverse $ sortOn snd
438 $ map (\(n,_) -> if (member n freq)
439 then (n,freq ! n)
440 else (n,0)) emerging
441
442
443 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
444 mostEmergentTfIdf nth freq foundations export =
445 over ( export_branches
446 . traverse )
447 (\b ->
448 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
449 tfidf = findTfIdf (map _phylo_groupNgrams groups)
450 emergences = findEmergences groups freq
451 selected = if (null emergences)
452 then map fst $ take nth tfidf
453 else [fst $ head' "mostEmergentTfIdf" emergences]
454 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
455 in b & branch_label .~ (ngramsToLabel foundations selected)) export
456
457
458 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
459 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
460 $ take nth
461 $ reverse
462 $ sortOn snd $ zip [0..] meta
463
464
465 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
466 mostInclusive nth foundations export =
467 over ( export_branches
468 . traverse )
469 (\b ->
470 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
471 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
472 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
473 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
474 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
475 in b & branch_label .~ lbl ) export
476
477
478 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
479 mostEmergentInclusive nth foundations export =
480 over ( export_groups
481 . traverse )
482 (\g ->
483 let lbl = ngramsToLabel foundations
484 $ take nth
485 $ map (\(_,(_,idx)) -> idx)
486 $ concat
487 $ map (\groups -> sortOn (fst . snd) groups)
488 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
489 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
490 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
491 in g & phylo_groupLabel .~ lbl ) export
492
493
494 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
495 processLabels labels foundations freq export =
496 foldl (\export' label ->
497 case label of
498 GroupLabel tagger nth ->
499 case tagger of
500 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
501 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
502 BranchLabel tagger nth ->
503 case tagger of
504 MostInclusive -> mostInclusive nth foundations export'
505 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
506 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
507
508
509 ------------------
510 -- | Dynamics | --
511 ------------------
512
513
514 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
515 toDynamics n parents g m =
516 let prd = g ^. phylo_groupPeriod
517 end = last' "dynamics" (sort $ map snd $ elems m)
518 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
519 {- decrease -}
520 then 2
521 else if ((fst prd) == (fst $ m ! n))
522 {- emerging -}
523 then 0
524 else if isNew
525 {- emergence -}
526 then 1
527 else 3
528 where
529 --------------------------------------
530 isNew :: Bool
531 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
532
533
534 processDynamics :: [PhyloGroup] -> [PhyloGroup]
535 processDynamics groups =
536 map (\g ->
537 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
538 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
539 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
540 where
541 --------------------------------------
542 mapNgrams :: Map Int (Date,Date)
543 mapNgrams = map (\dates ->
544 let dates' = sort dates
545 in (head' "dynamics" dates', last' "dynamics" dates'))
546 $ fromListWith (++)
547 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
548 $ (g ^. phylo_groupNgrams))) [] groups
549
550
551 -----------------
552 -- | horizon | --
553 -----------------
554
555 getGroupThr :: Double -> PhyloGroup -> Double
556 getGroupThr step g =
557 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
558 breaks = (g ^. phylo_groupMeta) ! "breaks"
559 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
560
561 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
562 toAncestor nbDocs diago proximity step candidates ego =
563 let curr = ego ^. phylo_groupAncestors
564 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
565 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
566 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
567 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
568
569
570 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
571 headsToAncestors nbDocs diago proximity step heads acc =
572 if (null heads)
573 then acc
574 else
575 let ego = head' "headsToAncestors" heads
576 heads' = tail' "headsToAncestors" heads
577 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
578
579
580 toHorizon :: Phylo -> Phylo
581 toHorizon phylo =
582 let phyloAncestor = updatePhyloGroups
583 level
584 (fromList $ map (\g -> (getGroupId g, g))
585 $ concat
586 $ tracePhyloAncestors newGroups) phylo
587 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
588 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
589 in updatePhyloGroups level reBranched phylo
590 where
591 -- | 1) for each periods
592 periods :: [PhyloPeriodId]
593 periods = getPeriodIds phylo
594 -- --
595 level :: Level
596 level = getLastLevel phylo
597 -- --
598 frame :: Int
599 frame = getTimeFrame $ timeUnit $ getConfig phylo
600 -- | 2) find ancestors between groups without parents
601 mapGroups :: [[PhyloGroup]]
602 mapGroups = map (\prd ->
603 let groups = getGroupsFromLevelPeriods level [prd] phylo
604 childs = getPreviousChildIds level frame prd periods phylo
605 -- maybe add a better filter for non isolated ancestors
606 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
607 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
608 noHeads = groups \\ heads
609 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
610 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
611 proximity = (phyloProximity $ getConfig phylo)
612 step = case getSeaElevation phylo of
613 Constante _ s -> s
614 Adaptative _ -> undefined
615 -- in headsToAncestors nbDocs diago proximity heads groups []
616 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
617 $ headsToAncestors nbDocs diago proximity step heads []
618 ) periods
619 -- | 3) process this task concurrently
620 newGroups :: [[PhyloGroup]]
621 newGroups = mapGroups `using` parList rdeepseq
622 --------------------------------------
623
624 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
625 getPreviousChildIds lvl frame curr prds phylo =
626 concat $ map ((map fst) . _phylo_groupPeriodChilds)
627 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
628
629 ---------------------
630 -- | phyloExport | --
631 ---------------------
632
633 toPhyloExport :: Phylo -> DotGraph DotId
634 toPhyloExport phylo = exportToDot phylo
635 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
636 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
637 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
638 $ processMetrics phylo export
639 where
640 export :: PhyloExport
641 export = PhyloExport groups branches
642 --------------------------------------
643 branches :: [PhyloBranch]
644 branches = map (\g ->
645 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
646 breaks = (g ^. phylo_groupMeta) ! "breaks"
647 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
648 in PhyloBranch (g ^. phylo_groupBranchId)
649 canonId
650 seaLvl
651 0
652 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
653 0
654 0
655 "" empty)
656 $ map (\gs -> head' "export" gs)
657 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
658 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
659 --------------------------------------
660 groups :: [PhyloGroup]
661 groups = traceExportGroups
662 $ processDynamics
663 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
664 $ tracePhyloInfo
665 $ toHorizon phylo
666
667
668 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
669 traceExportBranches branches = trace ("\n"
670 <> "-- | Export " <> show(length branches) <> " branches") branches
671
672 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
673 tracePhyloAncestors groups = trace ("\n"
674 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
675 ) groups
676
677 tracePhyloInfo :: Phylo -> Phylo
678 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
679 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
680 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
681 ) phylo
682
683
684 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
685 traceExportGroups groups = trace ("\n" <> "-- | Export "
686 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
687 <> show(length groups) <> " groups and "
688 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
689 ) groups
690