]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
fix the shape
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Int -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Int] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 1 lst = dynamicToColor 1
58 | elem 2 lst = dynamicToColor 2
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d, d'), lvl), idx) =
72 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
73
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
76
77 periodIdToDotId :: Period -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
79
80 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
81 branchToDotNode b bId =
82 node (branchIdToDotId $ b ^. branch_id)
83 ( [ FillColor [toWColor CornSilk]
84 , FontName "Arial"
85 , FontSize 40
86 , Shape Egg
87 , Style [SItem Bold []] ]
88 <> (metaToAttr $ b ^. branch_meta)
89 <> [ toAttr "nodeType" "branch"
90 , toAttr "bId" (pack $ show bId)
91 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
92 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
93 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
94 , toAttr "label" (pack $ show $ b ^. branch_label)
95 ])
96
97 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
98 periodToDotNode prd prd' =
99 node (periodIdToDotId prd) $
100 [ Shape BoxShape
101 , FontSize 50
102 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
103 <> [ toAttr "nodeType" "period"
104 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
105 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
106 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
107 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
108
109
110 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
111 groupToDotNode fdt g bId =
112 node (groupIdToDotId $ getGroupId g)
113 ([Shape Square]
114 <> [ toAttr "nodeType" "group"
115 , toAttr "gid" (groupIdToDotId $ getGroupId g)
116 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
117 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
118 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
119 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
120 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
121 , toAttr "bId" (pack $ show bId)
122 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
123 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
124 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
125 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
126 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
127 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
128 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
129 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
130 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
131 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
132 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
133 ])
134
135 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
136 toDotEdge' source target thr w edgeType = edge source target
137 (case edgeType of
138 GroupToGroup -> undefined
139 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
140 BranchToGroup -> undefined
141 BranchToBranch -> undefined
142 GroupToAncestor -> undefined
143 PeriodToPeriod -> undefined)
144
145
146 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
147 toDotEdge source target lbl edgeType = edge source target
148 (case edgeType of
149 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
150 GroupToGroupMemory -> undefined
151 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
152 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
153 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
154 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
155
156
157 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
158 mergePointers groups =
159 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
160 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
161 in unionWith (\w w' -> max w w') toChilds toParents
162
163 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
164 mergePointersMemory groups =
165 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
166 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
167 in concat [toChilds,toParents]
168
169 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
170 mergeAncestors groups = concat
171 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
172 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
173
174
175 toBid :: PhyloGroup -> [PhyloBranch] -> Int
176 toBid g bs =
177 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
178 in fromJust $ elemIndex b' bs
179
180 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
181 exportToDot phylo export =
182 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
183 <> show(length $ export ^. export_groups) <> " groups "
184 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
185 <> "##########################") $
186 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
187
188 {- 1) init the dot graph -}
189 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
190 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
191 , Ratio FillRatio
192 -- , Ratio AutoRatio
193 , Style [SItem Filled []],Color [toWColor White]]
194 {-- home made attributes -}
195 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
196 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
197 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
198 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
199 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
200 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
201 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
202 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
203 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
204 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
205 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
206 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
207 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
208 ])
209
210 {-
211 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
212
213 -- 2) create a layer for the branches labels -}
214 subgraph (Str "Branches peaks") $ do
215
216 -- graphAttrs [Rank SameRank]
217 {-
218 -- 3) group the branches by hierarchy
219 -- mapM (\branches ->
220 -- subgraph (Str "Branches clade") $ do
221 -- graphAttrs [Rank SameRank]
222
223 -- -- 4) create a node for each branch
224 -- mapM branchToDotNode branches
225 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
226 -}
227 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
228
229 {-- 5) create a layer for each period -}
230 _ <- mapM (\period ->
231 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
232 graphAttrs [Rank SameRank]
233 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
234
235 {-- 6) create a node for each group -}
236 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
237 ) $ phylo ^. phylo_periods
238
239 {-- 7) create the edges between a branch and its first groups -}
240 _ <- mapM (\(bId,groups) ->
241 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
242 )
243 $ toList
244 $ map (\groups -> head' "toDot"
245 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
246 $ sortOn (fst . _phylo_groupPeriod) groups)
247 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
248
249 {- 8) create the edges between the groups -}
250 _ <- mapM (\((k,k'),v) ->
251 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
252 ) $ (toList . mergePointers) $ export ^. export_groups
253
254 {- 8-bis) create the edges between the groups -}
255 {- _ <- mapM (\((k,k'),v) ->
256 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
257 ) $ mergePointersMemory $ export ^. export_groups -}
258
259 _ <- mapM (\((k,k'),v) ->
260 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
261 ) $ mergeAncestors $ export ^. export_groups
262
263 -- 10) create the edges between the periods
264 _ <- mapM (\(prd,prd') ->
265 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
266 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
267
268 {- 8) create the edges between the branches
269 -- _ <- mapM (\(bId,bId') ->
270 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
271 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
272 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
273 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
274 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
275 -}
276
277
278 graphAttrs [Rank SameRank]
279
280
281 ----------------
282 -- | Filter | --
283 ----------------
284
285 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
286 filterByBranchSize thr export =
287 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
288 in export & export_branches .~ (fst splited)
289 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
290
291
292 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
293 processFilters filters qua export =
294 foldl (\export' f -> case f of
295 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
296 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
297 else filterByBranchSize thr export'
298 ) export filters
299
300 --------------
301 -- | Sort | --
302 --------------
303
304 branchToIso :: [PhyloBranch] -> [PhyloBranch]
305 branchToIso branches =
306 let steps = map sum
307 $ inits
308 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
309 $ zip branches
310 $ ([0] ++ (map (\(b,b') ->
311 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
312 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
313 in
314 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
315 then (b' ^. branch_seaLevel) !! (lmin - 1)
316 else (b' ^. branch_seaLevel) !! (idx - 1)
317 ) $ listToSeq branches))
318 in map (\(x,b) -> b & branch_x .~ x)
319 $ zip steps branches
320
321 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
322 branchToIso' start step branches =
323 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
324 $ inits
325 $ ([0] ++ (map (\(b,b') ->
326 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
327 in 1 - start - step * root) $ listToSeq branches))
328 in map (\(x,b) -> b & branch_x .~ x)
329 $ zip bx branches
330
331
332 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
333 sortByHierarchy depth branches =
334 if (length branches == 1)
335 then branches
336 else concat
337 $ map (\branches' ->
338 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
339 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
340 ++ (sortByHierarchy (depth + 1) (snd partitions)))
341 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
342 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
343
344
345 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
346 sortByBirthDate order export =
347 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
348 branches' = case order of
349 Asc -> branches
350 Desc -> reverse branches
351 in export & export_branches .~ branches'
352
353 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
354 processSort sort' elev export = case sort' of
355 ByBirthDate o -> sortByBirthDate o export
356 ByHierarchy _ -> case elev of
357 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
358 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
359 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
360
361 -----------------
362 -- | Metrics | --
363 -----------------
364
365 -- | Return the conditional probability of i knowing j
366 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
367 conditional m i j = (findWithDefault 0 (i,j) m)
368 / (m ! (j,j))
369
370
371 -- | Return the genericity score of a given ngram
372 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
373 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
374 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
375
376
377 -- | Return the specificity score of a given ngram
378 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
379 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
380 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
381
382
383 -- | Return the inclusion score of a given ngram
384 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
385 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
386 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
387
388
389 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
390 ngramsMetrics phylo export =
391 over ( export_groups
392 . traverse )
393 (\g -> g & phylo_groupMeta %~ insert "genericity"
394 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
395 & phylo_groupMeta %~ insert "specificity"
396 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
397 & phylo_groupMeta %~ insert "inclusion"
398 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
399 & phylo_groupMeta %~ insert "frequence"
400 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
401 ) export
402
403
404 branchDating :: PhyloExport -> PhyloExport
405 branchDating export =
406 over ( export_branches
407 . traverse )
408 (\b ->
409 let groups = sortOn fst
410 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
411 then acc ++ [g ^. phylo_groupPeriod]
412 else acc ) [] $ export ^. export_groups
413 periods = nub groups
414 birth = fst $ head' "birth" groups
415 death = snd $ last' "death" groups
416 age = death - birth
417 in b & branch_meta %~ insert "birth" [fromIntegral birth]
418 & branch_meta %~ insert "death" [fromIntegral death]
419 & branch_meta %~ insert "age" [fromIntegral age]
420 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
421
422 processMetrics :: Phylo -> PhyloExport -> PhyloExport
423 processMetrics phylo export = ngramsMetrics phylo
424 $ branchDating export
425
426
427 -----------------
428 -- | Taggers | --
429 -----------------
430
431 nk :: Int -> [[Int]] -> Int
432 nk n groups = sum
433 $ map (\g -> if (elem n g)
434 then 1
435 else 0) groups
436
437
438 tf :: Int -> [[Int]] -> Double
439 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
440
441
442 idf :: Int -> [[Int]] -> Double
443 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
444
445
446 findTfIdf :: [[Int]] -> [(Int,Double)]
447 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
448
449
450 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
451 findEmergences groups freq =
452 let ngrams = map _phylo_groupNgrams groups
453 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
454 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
455 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
456 in reverse $ sortOn snd
457 $ map (\(n,_) -> if (member n freq)
458 then (n,freq ! n)
459 else (n,0)) emerging
460
461
462 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
463 mostEmergentTfIdf nth freq foundations export =
464 over ( export_branches
465 . traverse )
466 (\b ->
467 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
468 tfidf = findTfIdf (map _phylo_groupNgrams groups)
469 emergences = findEmergences groups freq
470 selected = if (null emergences)
471 then map fst $ take nth tfidf
472 else [fst $ head' "mostEmergentTfIdf" emergences]
473 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
474 in b & branch_label .~ (ngramsToLabel foundations selected)) export
475
476
477 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
478 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
479 $ take nth
480 $ reverse
481 $ sortOn snd $ zip [0..] meta
482
483
484 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
485 mostInclusive nth foundations export =
486 over ( export_branches
487 . traverse )
488 (\b ->
489 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
490 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
491 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
492 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
493 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
494 in b & branch_label .~ lbl ) export
495
496
497 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
498 mostEmergentInclusive nth foundations export =
499 over ( export_groups
500 . traverse )
501 (\g ->
502 let lbl = ngramsToLabel foundations
503 $ take nth
504 $ map (\(_,(_,idx)) -> idx)
505 $ concat
506 $ map (\groups -> sortOn (fst . snd) groups)
507 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
508 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
509 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
510 in g & phylo_groupLabel .~ lbl ) export
511
512
513 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
514 processLabels labels foundations freq export =
515 foldl (\export' label ->
516 case label of
517 GroupLabel tagger nth ->
518 case tagger of
519 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
520 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
521 BranchLabel tagger nth ->
522 case tagger of
523 MostInclusive -> mostInclusive nth foundations export'
524 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
525 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
526
527
528 ------------------
529 -- | Dynamics | --
530 ------------------
531
532 -- utiliser & creer une Map FdtId [PhyloGroup]
533 -- n = index of the current term
534 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
535 toDynamics n elders g m =
536 let prd = g ^. phylo_groupPeriod
537 end = last' "dynamics" (sort $ map snd $ elems m)
538 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
539 {- decrease -}
540 then 2
541 else if ((fst prd) == (fst $ m ! n))
542 {- emerging -}
543 then 0
544 else if isNew
545 {- emergence -}
546 then 1
547 else 3
548 where
549 --------------------------------------
550 isNew :: Bool
551 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
552
553 type FdtId = Int
554 processDynamics :: [PhyloGroup] -> [PhyloGroup]
555 processDynamics groups =
556 map (\g ->
557 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
558 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
559 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
560 where
561 --------------------------------------
562 mapNgrams :: Map FdtId (Date,Date)
563 mapNgrams = map (\dates ->
564 let dates' = sort dates
565 in (head' "dynamics" dates', last' "dynamics" dates'))
566 $ fromListWith (++)
567 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
568 $ (g ^. phylo_groupNgrams))) [] groups
569
570
571 -----------------
572 -- | horizon | --
573 -----------------
574
575 getGroupThr :: Double -> PhyloGroup -> Double
576 getGroupThr step g =
577 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
578 breaks = (g ^. phylo_groupMeta) ! "breaks"
579 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
580
581 toAncestor :: Double
582 -> Map Int Double
583 -> PhyloSimilarity
584 -> Double
585 -> [PhyloGroup]
586 -> PhyloGroup
587 -> PhyloGroup
588 toAncestor nbDocs diago similarity step candidates ego =
589 let curr = ego ^. phylo_groupAncestors
590 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
591 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
592 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
593 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
594
595
596 headsToAncestors :: Double
597 -> Map Int Double
598 -> PhyloSimilarity
599 -> Double
600 -> [PhyloGroup]
601 -> [PhyloGroup]
602 -> [PhyloGroup]
603 headsToAncestors nbDocs diago similarity step heads acc =
604 if (null heads)
605 then acc
606 else
607 let ego = head' "headsToAncestors" heads
608 heads' = tail' "headsToAncestors" heads
609 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
610
611
612 toHorizon :: Phylo -> Phylo
613 toHorizon phylo =
614 let phyloAncestor = updatePhyloGroups
615 scale
616 (fromList $ map (\g -> (getGroupId g, g))
617 $ concat
618 $ tracePhyloAncestors newGroups) phylo
619 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
620 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
621 in updatePhyloGroups scale reBranched phylo
622 where
623 -- | 1) for each periods
624 periods :: [Period]
625 periods = getPeriodIds phylo
626 -- --
627 scale :: Scale
628 scale = getLastLevel phylo
629 -- --
630 frame :: Int
631 frame = getTimeFrame $ timeUnit $ getConfig phylo
632 -- | 2) find ancestors between groups without parents
633 mapGroups :: [[PhyloGroup]]
634 mapGroups = map (\prd ->
635 let groups = getGroupsFromScalePeriods scale [prd] phylo
636 childs = getPreviousChildIds scale frame prd periods phylo
637 -- maybe add a better filter for non isolated ancestors
638 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
639 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
640 noHeads = groups \\ heads
641 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
642 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
643 sim = (similarity $ getConfig phylo)
644 step = case getSeaElevation phylo of
645 Constante _ s -> s
646 Adaptative _ -> 0
647 Evolving _ -> 0
648 -- in headsToAncestors nbDocs diago Similarity heads groups []
649 in map (toAncestor nbDocs diago sim step noHeads)
650 $ headsToAncestors nbDocs diago sim step heads []
651 ) periods
652 -- | 3) process this task concurrently
653 newGroups :: [[PhyloGroup]]
654 newGroups = mapGroups `using` parList rdeepseq
655 --------------------------------------
656
657 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
658 getPreviousChildIds lvl frame curr prds phylo =
659 concat $ map ((map fst) . _phylo_groupPeriodChilds)
660 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
661
662 ---------------------
663 -- | phyloExport | --
664 ---------------------
665
666 toPhyloExport :: Phylo -> DotGraph DotId
667 toPhyloExport phylo = exportToDot phylo
668 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
669 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
670 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
671 $ processMetrics phylo export
672 where
673 export :: PhyloExport
674 export = PhyloExport groups branches
675 --------------------------------------
676 branches :: [PhyloBranch]
677 branches = map (\g ->
678 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
679 breaks = (g ^. phylo_groupMeta) ! "breaks"
680 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
681 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
682 , _branch_canonId = canonId
683 , _branch_seaLevel = seaLvl
684 , _branch_x = 0
685 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
686 , _branch_w = 0
687 , _branch_t = 0
688 , _branch_label = ""
689 , _branch_meta = empty })
690 $ map (head' "export")
691 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
692 $ sortOn (^. phylo_groupBranchId) groups
693 --------------------------------------
694 groups :: [PhyloGroup]
695 groups = traceExportGroups
696 -- necessaire ?
697 $ processDynamics
698 $ getGroupsFromScale (phyloScale $ getConfig phylo)
699 $ tracePhyloInfo phylo
700
701
702 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
703 traceExportBranches branches = trace ("\n"
704 <> "-- | Export " <> show(length branches) <> " branches") branches
705
706 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
707 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
708
709 tracePhyloInfo :: Phylo -> Phylo
710 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
711 <> show(getLevel phylo) <> " applied to "
712 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
713 ) phylo
714
715
716 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
717 traceExportGroups groups = trace ("\n" <> "-- | Export "
718 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
719 <> show(length groups) <> " groups and "
720 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
721 ) groups