]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
fix the invalid chart
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Int -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Int] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 1 lst = dynamicToColor 1
58 | elem 2 lst = dynamicToColor 2
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d, d'), lvl), idx) =
72 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
73
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
76
77 periodIdToDotId :: Period -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
79
80 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
81 branchToDotNode b bId =
82 node (branchIdToDotId $ b ^. branch_id)
83 ( [ FillColor [toWColor CornSilk]
84 , FontName "Arial"
85 , FontSize 40
86 , Shape Egg
87 , Style [SItem Bold []] ]
88 <> (metaToAttr $ b ^. branch_meta)
89 <> [ toAttr "nodeType" "branch"
90 , toAttr "bId" (pack $ show bId)
91 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
92 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
93 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
94 , toAttr "label" (pack $ show $ b ^. branch_label)
95 ])
96
97 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
98 periodToDotNode prd prd' =
99 node (periodIdToDotId prd) $
100 [ Shape BoxShape
101 , FontSize 50
102 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
103 <> [ toAttr "nodeType" "period"
104 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
105 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
106 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
107 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
108
109
110 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
111 groupToDotNode fdt g bId =
112 node (groupIdToDotId $ getGroupId g)
113 ([ toAttr "nodeType" "group"
114 , toAttr "gid" (groupIdToDotId $ getGroupId g)
115 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
116 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
117 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
118 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
119 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
120 , toAttr "bId" (pack $ show bId)
121 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
122 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
123 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
124 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
125 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
126 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
127 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
128 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
129 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
130 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
131 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
132 ])
133
134 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
135 toDotEdge' source target thr w edgeType = edge source target
136 (case edgeType of
137 GroupToGroup -> undefined
138 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
139 BranchToGroup -> undefined
140 BranchToBranch -> undefined
141 GroupToAncestor -> undefined
142 PeriodToPeriod -> undefined)
143
144
145 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
146 toDotEdge source target lbl edgeType = edge source target
147 (case edgeType of
148 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
149 GroupToGroupMemory -> undefined
150 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
151 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
152 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
153 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
154
155
156 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
157 mergePointers groups =
158 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
159 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
160 in unionWith (\w w' -> max w w') toChilds toParents
161
162 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
163 mergePointersMemory groups =
164 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
165 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
166 in concat [toChilds,toParents]
167
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
172
173
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
175 toBid g bs =
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
178
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
186
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
190 , Ratio FillRatio
191 -- , Ratio AutoRatio
192 , Style [SItem Filled []],Color [toWColor White]]
193 {-- home made attributes -}
194 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
195 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
196 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
197 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
198 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
199 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
200 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
201 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
202 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
203 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
204 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
205 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
206 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
207 ])
208
209 {-
210 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
211
212 -- 2) create a layer for the branches labels -}
213 subgraph (Str "Branches peaks") $ do
214
215 -- graphAttrs [Rank SameRank]
216 {-
217 -- 3) group the branches by hierarchy
218 -- mapM (\branches ->
219 -- subgraph (Str "Branches clade") $ do
220 -- graphAttrs [Rank SameRank]
221
222 -- -- 4) create a node for each branch
223 -- mapM branchToDotNode branches
224 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
225 -}
226 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
227
228 {-- 5) create a layer for each period -}
229 _ <- mapM (\period ->
230 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
231 graphAttrs [Rank SameRank]
232 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
233
234 {-- 6) create a node for each group -}
235 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
236 ) $ phylo ^. phylo_periods
237
238 {-- 7) create the edges between a branch and its first groups -}
239 _ <- mapM (\(bId,groups) ->
240 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
241 )
242 $ toList
243 $ map (\groups -> head' "toDot"
244 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
245 $ sortOn (fst . _phylo_groupPeriod) groups)
246 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
247
248 {- 8) create the edges between the groups -}
249 _ <- mapM (\((k,k'),v) ->
250 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
251 ) $ (toList . mergePointers) $ export ^. export_groups
252
253 {- 8-bis) create the edges between the groups -}
254 {- _ <- mapM (\((k,k'),v) ->
255 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
256 ) $ mergePointersMemory $ export ^. export_groups -}
257
258 _ <- mapM (\((k,k'),v) ->
259 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
260 ) $ mergeAncestors $ export ^. export_groups
261
262 -- 10) create the edges between the periods
263 _ <- mapM (\(prd,prd') ->
264 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
265 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
266
267 {- 8) create the edges between the branches
268 -- _ <- mapM (\(bId,bId') ->
269 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
270 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
271 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
272 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
273 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
274 -}
275
276
277 graphAttrs [Rank SameRank]
278
279
280 ----------------
281 -- | Filter | --
282 ----------------
283
284 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
285 filterByBranchSize thr export =
286 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
287 in export & export_branches .~ (fst splited)
288 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
289
290
291 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
292 processFilters filters qua export =
293 foldl (\export' f -> case f of
294 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
295 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
296 else filterByBranchSize thr export'
297 ) export filters
298
299 --------------
300 -- | Sort | --
301 --------------
302
303 branchToIso :: [PhyloBranch] -> [PhyloBranch]
304 branchToIso branches =
305 let steps = map sum
306 $ inits
307 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
308 $ zip branches
309 $ ([0] ++ (map (\(b,b') ->
310 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
311 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
312 in
313 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
314 then (b' ^. branch_seaLevel) !! (lmin - 1)
315 else (b' ^. branch_seaLevel) !! (idx - 1)
316 ) $ listToSeq branches))
317 in map (\(x,b) -> b & branch_x .~ x)
318 $ zip steps branches
319
320 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
321 branchToIso' start step branches =
322 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
323 $ inits
324 $ ([0] ++ (map (\(b,b') ->
325 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
326 in 1 - start - step * root) $ listToSeq branches))
327 in map (\(x,b) -> b & branch_x .~ x)
328 $ zip bx branches
329
330
331 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
332 sortByHierarchy depth branches =
333 if (length branches == 1)
334 then branches
335 else concat
336 $ map (\branches' ->
337 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
338 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
339 ++ (sortByHierarchy (depth + 1) (snd partitions)))
340 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
341 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
342
343
344 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
345 sortByBirthDate order export =
346 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
347 branches' = case order of
348 Asc -> branches
349 Desc -> reverse branches
350 in export & export_branches .~ branches'
351
352 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
353 processSort sort' elev export = case sort' of
354 ByBirthDate o -> sortByBirthDate o export
355 ByHierarchy _ -> case elev of
356 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
357 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
358 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
359
360 -----------------
361 -- | Metrics | --
362 -----------------
363
364 -- | Return the conditional probability of i knowing j
365 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
366 conditional m i j = (findWithDefault 0 (i,j) m)
367 / (m ! (j,j))
368
369
370 -- | Return the genericity score of a given ngram
371 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
372 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
373 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
374
375
376 -- | Return the specificity score of a given ngram
377 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
378 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
379 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
380
381
382 -- | Return the inclusion score of a given ngram
383 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
384 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
385 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
386
387
388 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
389 ngramsMetrics phylo export =
390 over ( export_groups
391 . traverse )
392 (\g -> g & phylo_groupMeta %~ insert "genericity"
393 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
394 & phylo_groupMeta %~ insert "specificity"
395 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
396 & phylo_groupMeta %~ insert "inclusion"
397 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
398 & phylo_groupMeta %~ insert "frequence"
399 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
400 ) export
401
402
403 branchDating :: PhyloExport -> PhyloExport
404 branchDating export =
405 over ( export_branches
406 . traverse )
407 (\b ->
408 let groups = sortOn fst
409 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
410 then acc ++ [g ^. phylo_groupPeriod]
411 else acc ) [] $ export ^. export_groups
412 periods = nub groups
413 birth = fst $ head' "birth" groups
414 death = snd $ last' "death" groups
415 age = death - birth
416 in b & branch_meta %~ insert "birth" [fromIntegral birth]
417 & branch_meta %~ insert "death" [fromIntegral death]
418 & branch_meta %~ insert "age" [fromIntegral age]
419 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
420
421 processMetrics :: Phylo -> PhyloExport -> PhyloExport
422 processMetrics phylo export = ngramsMetrics phylo
423 $ branchDating export
424
425
426 -----------------
427 -- | Taggers | --
428 -----------------
429
430 nk :: Int -> [[Int]] -> Int
431 nk n groups = sum
432 $ map (\g -> if (elem n g)
433 then 1
434 else 0) groups
435
436
437 tf :: Int -> [[Int]] -> Double
438 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
439
440
441 idf :: Int -> [[Int]] -> Double
442 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
443
444
445 findTfIdf :: [[Int]] -> [(Int,Double)]
446 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
447
448
449 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
450 findEmergences groups freq =
451 let ngrams = map _phylo_groupNgrams groups
452 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
453 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
454 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
455 in reverse $ sortOn snd
456 $ map (\(n,_) -> if (member n freq)
457 then (n,freq ! n)
458 else (n,0)) emerging
459
460
461 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
462 mostEmergentTfIdf nth freq foundations export =
463 over ( export_branches
464 . traverse )
465 (\b ->
466 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
467 tfidf = findTfIdf (map _phylo_groupNgrams groups)
468 emergences = findEmergences groups freq
469 selected = if (null emergences)
470 then map fst $ take nth tfidf
471 else [fst $ head' "mostEmergentTfIdf" emergences]
472 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
473 in b & branch_label .~ (ngramsToLabel foundations selected)) export
474
475
476 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
477 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
478 $ take nth
479 $ reverse
480 $ sortOn snd $ zip [0..] meta
481
482
483 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
484 mostInclusive nth foundations export =
485 over ( export_branches
486 . traverse )
487 (\b ->
488 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
489 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
490 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
491 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
492 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
493 in b & branch_label .~ lbl ) export
494
495
496 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
497 mostEmergentInclusive nth foundations export =
498 over ( export_groups
499 . traverse )
500 (\g ->
501 let lbl = ngramsToLabel foundations
502 $ take nth
503 $ map (\(_,(_,idx)) -> idx)
504 $ concat
505 $ map (\groups -> sortOn (fst . snd) groups)
506 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
507 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
508 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
509 in g & phylo_groupLabel .~ lbl ) export
510
511
512 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
513 processLabels labels foundations freq export =
514 foldl (\export' label ->
515 case label of
516 GroupLabel tagger nth ->
517 case tagger of
518 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
519 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
520 BranchLabel tagger nth ->
521 case tagger of
522 MostInclusive -> mostInclusive nth foundations export'
523 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
524 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
525
526
527 ------------------
528 -- | Dynamics | --
529 ------------------
530
531 -- utiliser & creer une Map FdtId [PhyloGroup]
532 -- n = index of the current term
533 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
534 toDynamics n elders g m =
535 let prd = g ^. phylo_groupPeriod
536 end = last' "dynamics" (sort $ map snd $ elems m)
537 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
538 {- decrease -}
539 then 2
540 else if ((fst prd) == (fst $ m ! n))
541 {- emerging -}
542 then 0
543 else if isNew
544 {- emergence -}
545 then 1
546 else 3
547 where
548 --------------------------------------
549 isNew :: Bool
550 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
551
552 type FdtId = Int
553 processDynamics :: [PhyloGroup] -> [PhyloGroup]
554 processDynamics groups =
555 map (\g ->
556 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
557 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
558 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
559 where
560 --------------------------------------
561 mapNgrams :: Map FdtId (Date,Date)
562 mapNgrams = map (\dates ->
563 let dates' = sort dates
564 in (head' "dynamics" dates', last' "dynamics" dates'))
565 $ fromListWith (++)
566 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
567 $ (g ^. phylo_groupNgrams))) [] groups
568
569
570 -----------------
571 -- | horizon | --
572 -----------------
573
574 getGroupThr :: Double -> PhyloGroup -> Double
575 getGroupThr step g =
576 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
577 breaks = (g ^. phylo_groupMeta) ! "breaks"
578 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
579
580 toAncestor :: Double
581 -> Map Int Double
582 -> PhyloSimilarity
583 -> Double
584 -> [PhyloGroup]
585 -> PhyloGroup
586 -> PhyloGroup
587 toAncestor nbDocs diago similarity step candidates ego =
588 let curr = ego ^. phylo_groupAncestors
589 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
590 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
591 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
592 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
593
594
595 headsToAncestors :: Double
596 -> Map Int Double
597 -> PhyloSimilarity
598 -> Double
599 -> [PhyloGroup]
600 -> [PhyloGroup]
601 -> [PhyloGroup]
602 headsToAncestors nbDocs diago similarity step heads acc =
603 if (null heads)
604 then acc
605 else
606 let ego = head' "headsToAncestors" heads
607 heads' = tail' "headsToAncestors" heads
608 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
609
610
611 toHorizon :: Phylo -> Phylo
612 toHorizon phylo =
613 let phyloAncestor = updatePhyloGroups
614 scale
615 (fromList $ map (\g -> (getGroupId g, g))
616 $ concat
617 $ tracePhyloAncestors newGroups) phylo
618 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
619 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
620 in updatePhyloGroups scale reBranched phylo
621 where
622 -- | 1) for each periods
623 periods :: [Period]
624 periods = getPeriodIds phylo
625 -- --
626 scale :: Scale
627 scale = getLastLevel phylo
628 -- --
629 frame :: Int
630 frame = getTimeFrame $ timeUnit $ getConfig phylo
631 -- | 2) find ancestors between groups without parents
632 mapGroups :: [[PhyloGroup]]
633 mapGroups = map (\prd ->
634 let groups = getGroupsFromScalePeriods scale [prd] phylo
635 childs = getPreviousChildIds scale frame prd periods phylo
636 -- maybe add a better filter for non isolated ancestors
637 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
638 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
639 noHeads = groups \\ heads
640 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
641 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
642 sim = (similarity $ getConfig phylo)
643 step = case getSeaElevation phylo of
644 Constante _ s -> s
645 Adaptative _ -> 0
646 Evolving _ -> 0
647 -- in headsToAncestors nbDocs diago Similarity heads groups []
648 in map (toAncestor nbDocs diago sim step noHeads)
649 $ headsToAncestors nbDocs diago sim step heads []
650 ) periods
651 -- | 3) process this task concurrently
652 newGroups :: [[PhyloGroup]]
653 newGroups = mapGroups `using` parList rdeepseq
654 --------------------------------------
655
656 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
657 getPreviousChildIds lvl frame curr prds phylo =
658 concat $ map ((map fst) . _phylo_groupPeriodChilds)
659 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
660
661 ---------------------
662 -- | phyloExport | --
663 ---------------------
664
665 toPhyloExport :: Phylo -> DotGraph DotId
666 toPhyloExport phylo = exportToDot phylo
667 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
668 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
669 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
670 $ processMetrics phylo export
671 where
672 export :: PhyloExport
673 export = PhyloExport groups branches
674 --------------------------------------
675 branches :: [PhyloBranch]
676 branches = map (\g ->
677 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
678 breaks = (g ^. phylo_groupMeta) ! "breaks"
679 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
680 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
681 , _branch_canonId = canonId
682 , _branch_seaLevel = seaLvl
683 , _branch_x = 0
684 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
685 , _branch_w = 0
686 , _branch_t = 0
687 , _branch_label = ""
688 , _branch_meta = empty })
689 $ map (head' "export")
690 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
691 $ sortOn (^. phylo_groupBranchId) groups
692 --------------------------------------
693 groups :: [PhyloGroup]
694 groups = traceExportGroups
695 -- necessaire ?
696 $ processDynamics
697 $ getGroupsFromScale (phyloScale $ getConfig phylo)
698 $ tracePhyloInfo phylo
699
700
701 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
702 traceExportBranches branches = trace ("\n"
703 <> "-- | Export " <> show(length branches) <> " branches") branches
704
705 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
706 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
707
708 tracePhyloInfo :: Phylo -> Phylo
709 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
710 <> show(getLevel phylo) <> " applied to "
711 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
712 ) phylo
713
714
715 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
716 traceExportGroups groups = trace ("\n" <> "-- | Export "
717 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
718 <> show(length groups) <> " groups and "
719 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
720 ) groups