]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[DEPS] stack upgrade
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Core.Viz.AdaptativePhylo
22 import Gargantext.Core.Viz.Phylo.PhyloTools
23 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
24
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
34
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
39
40 --------------------
41 -- | Dot export | --
42 --------------------
43
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
46
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
49
50 dynamicToColor :: Double -> H.Attribute
51 dynamicToColor d
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
56
57 pickLabelColor :: [Double] -> H.Attribute
58 pickLabelColor lst
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
63
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
66
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
69
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
72
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
75
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
78
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
81
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
98 header :: H.Row
99 header =
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
109
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
121 ])
122
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130
131
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
142 , toAttr "label" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
143 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
144 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
145 ])
146
147
148 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
149 toDotEdge source target lbl edgeType = edge source target
150 (case edgeType of
151 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
152 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
153 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
154 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
155 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
156 , Label (StrLabel $ fromStrict lbl)]
157 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
158 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
159 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
160
161
162 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
163 mergePointers groups =
164 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
165 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
166 in unionWith (\w w' -> max w w') toChilds toParents
167
168 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
169 mergeAncestors groups = concat
170 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
171 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
172
173
174 toBid :: PhyloGroup -> [PhyloBranch] -> Int
175 toBid g bs =
176 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
177 in fromJust $ elemIndex b' bs
178
179 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
180 exportToDot phylo export =
181 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
182 <> show(length $ export ^. export_groups) <> " groups "
183 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
184 <> "##########################") $
185 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
186
187 {- 1) init the dot graph -}
188 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
189 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
190 , Ratio FillRatio
191 , Style [SItem Filled []],Color [toWColor White]]
192 {-- home made attributes -}
193 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
194 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
195 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
196 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
197 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
198 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
199 ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
200 ])
201
202 {-
203 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
204
205 -- 2) create a layer for the branches labels -}
206 subgraph (Str "Branches peaks") $ do
207
208 graphAttrs [Rank SameRank]
209 {-
210 -- 3) group the branches by hierarchy
211 -- mapM (\branches ->
212 -- subgraph (Str "Branches clade") $ do
213 -- graphAttrs [Rank SameRank]
214
215 -- -- 4) create a node for each branch
216 -- mapM branchToDotNode branches
217 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
218 -}
219 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
220
221 {-- 5) create a layer for each period -}
222 _ <- mapM (\period ->
223 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
224 graphAttrs [Rank SameRank]
225 periodToDotNode period
226
227 {-- 6) create a node for each group -}
228 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
229 ) $ getPeriodIds phylo
230
231 {-- 7) create the edges between a branch and its first groups -}
232 _ <- mapM (\(bId,groups) ->
233 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
234 )
235 $ toList
236 $ map (\groups -> head' "toDot"
237 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
238 $ sortOn (fst . _phylo_groupPeriod) groups)
239 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
240
241 {- 8) create the edges between the groups -}
242 _ <- mapM (\((k,k'),_) ->
243 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
244 ) $ (toList . mergePointers) $ export ^. export_groups
245
246 _ <- mapM (\((k,k'),_) ->
247 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
248 ) $ mergeAncestors $ export ^. export_groups
249
250 -- 10) create the edges between the periods
251 _ <- mapM (\(prd,prd') ->
252 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
253 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
254
255 {- 8) create the edges between the branches
256 -- _ <- mapM (\(bId,bId') ->
257 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
258 -- (Text.pack $ show(branchIdsToProximity bId bId'
259 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
260 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
261 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
262 -}
263
264
265 graphAttrs [Rank SameRank]
266
267
268 ----------------
269 -- | Filter | --
270 ----------------
271
272 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
273 filterByBranchSize thr export =
274 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
275 in export & export_branches .~ (fst splited)
276 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
277
278
279 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
280 processFilters filters qua export =
281 foldl (\export' f -> case f of
282 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
283 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
284 else filterByBranchSize thr export'
285 ) export filters
286
287 --------------
288 -- | Sort | --
289 --------------
290
291 branchToIso :: [PhyloBranch] -> [PhyloBranch]
292 branchToIso branches =
293 let steps = map sum
294 $ inits
295 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
296 $ zip branches
297 $ ([0] ++ (map (\(b,b') ->
298 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
299 in (b' ^. branch_seaLevel) !! (idx - 1)
300 ) $ listToSeq branches))
301 in map (\(x,b) -> b & branch_x .~ x)
302 $ zip steps branches
303
304
305 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
306 sortByHierarchy depth branches =
307 if (length branches == 1)
308 then branchToIso branches
309 else branchToIso $ concat
310 $ map (\branches' ->
311 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
312 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
313 ++ (sortByHierarchy (depth + 1) (snd partitions)))
314 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
315 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
316
317
318 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
319 sortByBirthDate order export =
320 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
321 branches' = case order of
322 Asc -> branches
323 Desc -> reverse branches
324 in export & export_branches .~ branches'
325
326 processSort :: Sort -> PhyloExport -> PhyloExport
327 processSort sort' export = case sort' of
328 ByBirthDate o -> sortByBirthDate o export
329 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
330
331
332 -----------------
333 -- | Metrics | --
334 -----------------
335
336 -- | Return the conditional probability of i knowing j
337 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
338 conditional m i j = (findWithDefault 0 (i,j) m)
339 / (m ! (j,j))
340
341
342 -- | Return the genericity score of a given ngram
343 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
344 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
345 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
346
347
348 -- | Return the specificity score of a given ngram
349 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
350 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
351 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
352
353
354 -- | Return the inclusion score of a given ngram
355 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
356 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
357 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
358
359
360 ngramsMetrics :: PhyloExport -> PhyloExport
361 ngramsMetrics export =
362 over ( export_groups
363 . traverse )
364 (\g -> g & phylo_groupMeta %~ insert "genericity"
365 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
366 & phylo_groupMeta %~ insert "specificity"
367 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
368 & phylo_groupMeta %~ insert "inclusion"
369 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
370 ) export
371
372
373 branchDating :: PhyloExport -> PhyloExport
374 branchDating export =
375 over ( export_branches
376 . traverse )
377 (\b ->
378 let groups = sortOn fst
379 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
380 then acc ++ [g ^. phylo_groupPeriod]
381 else acc ) [] $ export ^. export_groups
382 periods = nub groups
383 birth = fst $ head' "birth" groups
384 age = (snd $ last' "age" groups) - birth
385 in b & branch_meta %~ insert "birth" [fromIntegral birth]
386 & branch_meta %~ insert "age" [fromIntegral age]
387 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
388
389 processMetrics :: PhyloExport -> PhyloExport
390 processMetrics export = ngramsMetrics
391 $ branchDating export
392
393
394 -----------------
395 -- | Taggers | --
396 -----------------
397
398 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
399 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
400 $ take nth
401 $ reverse
402 $ sortOn snd $ zip [0..] meta
403
404
405 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
406 mostInclusive nth foundations export =
407 over ( export_branches
408 . traverse )
409 (\b ->
410 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
411 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
412 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
413 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
414 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
415 in b & branch_label .~ lbl ) export
416
417
418 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
419 mostEmergentInclusive nth foundations export =
420 over ( export_groups
421 . traverse )
422 (\g ->
423 let lbl = ngramsToLabel foundations
424 $ take nth
425 $ map (\(_,(_,idx)) -> idx)
426 $ concat
427 $ map (\groups -> sortOn (fst . snd) groups)
428 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
429 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
430 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
431 in g & phylo_groupLabel .~ lbl ) export
432
433
434 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
435 processLabels labels foundations export =
436 foldl (\export' label ->
437 case label of
438 GroupLabel tagger nth ->
439 case tagger of
440 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
441 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
442 BranchLabel tagger nth ->
443 case tagger of
444 MostInclusive -> mostInclusive nth foundations export'
445 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
446
447
448 ------------------
449 -- | Dynamics | --
450 ------------------
451
452
453 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
454 toDynamics n parents g m =
455 let prd = g ^. phylo_groupPeriod
456 end = last' "dynamics" (sort $ map snd $ elems m)
457 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
458 {- decrease -}
459 then 2
460 else if ((fst prd) == (fst $ m ! n))
461 {- recombination -}
462 then 0
463 else if isNew
464 {- emergence -}
465 then 1
466 else 3
467 where
468 --------------------------------------
469 isNew :: Bool
470 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
471
472
473 processDynamics :: [PhyloGroup] -> [PhyloGroup]
474 processDynamics groups =
475 map (\g ->
476 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
477 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
478 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
479 where
480 --------------------------------------
481 mapNgrams :: Map Int (Date,Date)
482 mapNgrams = map (\dates ->
483 let dates' = sort dates
484 in (head' "dynamics" dates', last' "dynamics" dates'))
485 $ fromListWith (++)
486 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
487 $ (g ^. phylo_groupNgrams))) [] groups
488
489
490 -----------------
491 -- | horizon | --
492 -----------------
493
494 getGroupThr :: Double -> PhyloGroup -> Double
495 getGroupThr step g =
496 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
497 breaks = (g ^. phylo_groupMeta) ! "breaks"
498 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
499
500 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
501 toAncestor nbDocs diago proximity step candidates ego =
502 let curr = ego ^. phylo_groupAncestors
503 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
504 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
505 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
506 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
507
508
509 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
510 headsToAncestors nbDocs diago proximity step heads acc =
511 if (null heads)
512 then acc
513 else
514 let ego = head' "headsToAncestors" heads
515 heads' = tail' "headsToAncestors" heads
516 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
517
518
519 toHorizon :: Phylo -> Phylo
520 toHorizon phylo =
521 let phyloAncestor = updatePhyloGroups
522 level
523 (fromList $ map (\g -> (getGroupId g, g))
524 $ concat
525 $ tracePhyloAncestors newGroups) phylo
526 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
527 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
528 in updatePhyloGroups level reBranched phylo
529 where
530 -- | 1) for each periods
531 periods :: [PhyloPeriodId]
532 periods = getPeriodIds phylo
533 -- --
534 level :: Level
535 level = getLastLevel phylo
536 -- --
537 frame :: Int
538 frame = getTimeFrame $ timeUnit $ getConfig phylo
539 -- | 2) find ancestors between groups without parents
540 mapGroups :: [[PhyloGroup]]
541 mapGroups = map (\prd ->
542 let groups = getGroupsFromLevelPeriods level [prd] phylo
543 childs = getPreviousChildIds level frame prd periods phylo
544 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
545 noHeads = groups \\ heads
546 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
547 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
548 proximity = (phyloProximity $ getConfig phylo)
549 step = case getSeaElevation phylo of
550 Constante _ s -> s
551 Adaptative _ -> undefined
552 -- in headsToAncestors nbDocs diago proximity heads groups []
553 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
554 $ headsToAncestors nbDocs diago proximity step heads []
555 ) periods
556 -- | 3) process this task concurrently
557 newGroups :: [[PhyloGroup]]
558 newGroups = mapGroups `using` parList rdeepseq
559 --------------------------------------
560
561 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
562 getPreviousChildIds lvl frame curr prds phylo =
563 concat $ map ((map fst) . _phylo_groupPeriodChilds)
564 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
565
566 ---------------------
567 -- | phyloExport | --
568 ---------------------
569
570 toPhyloExport :: Phylo -> DotGraph DotId
571 toPhyloExport phylo = exportToDot phylo
572 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
573 $ processSort (exportSort $ getConfig phylo)
574 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
575 $ processMetrics export
576 where
577 export :: PhyloExport
578 export = PhyloExport groups branches
579 --------------------------------------
580 branches :: [PhyloBranch]
581 branches = map (\g ->
582 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
583 breaks = (g ^. phylo_groupMeta) ! "breaks"
584 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
585 in PhyloBranch (g ^. phylo_groupBranchId)
586 canonId
587 seaLvl
588 0
589 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
590 0
591 0
592 "" empty)
593 $ map (\gs -> head' "export" gs)
594 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
595 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
596 --------------------------------------
597 groups :: [PhyloGroup]
598 groups = traceExportGroups
599 $ processDynamics
600 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
601 $ tracePhyloInfo
602 $ toHorizon phylo
603
604
605 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
606 traceExportBranches branches = trace ("\n"
607 <> "-- | Export " <> show(length branches) <> " branches") branches
608
609 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
610 tracePhyloAncestors groups = trace ("\n"
611 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
612 ) groups
613
614 tracePhyloInfo :: Phylo -> Phylo
615 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
616 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
617 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
618 ) phylo
619
620
621 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
622 traceExportGroups groups = trace ("\n" <> "-- | Export "
623 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
624 <> show(length groups) <> " groups and "
625 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
626 ) groups
627