]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/PhyloExport.hs
merge done
[gargantext.git] / src / Gargantext / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Viz.Phylo.PhyloExport where
14
15 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
16 import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
17 import Data.Vector (Vector)
18
19 import Prelude (writeFile)
20 import Gargantext.Prelude
21 import Gargantext.Viz.AdaptativePhylo
22 import Gargantext.Viz.Phylo.PhyloTools
23 import Gargantext.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
24
25 import Control.Lens hiding (Level)
26 import Control.Parallel.Strategies (parList, rdeepseq, using)
27 import Data.GraphViz hiding (DotGraph, Order)
28 import Data.GraphViz.Types.Generalised (DotGraph)
29 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
30 import Data.GraphViz.Types.Monadic
31 import Data.Text.Lazy (fromStrict, pack, unpack)
32 import System.FilePath
33 import Debug.Trace (trace)
34
35 import qualified Data.Text as Text
36 import qualified Data.Vector as Vector
37 import qualified Data.Text.Lazy as Lazy
38 import qualified Data.GraphViz.Attributes.HTML as H
39
40 --------------------
41 -- | Dot export | --
42 --------------------
43
44 dotToFile :: FilePath -> DotGraph DotId -> IO ()
45 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
46
47 dotToString :: DotGraph DotId -> [Char]
48 dotToString dotG = unpack (printDotGraph dotG)
49
50 dynamicToColor :: Double -> H.Attribute
51 dynamicToColor d
52 | d == 0 = H.BGColor (toColor LightCoral)
53 | d == 1 = H.BGColor (toColor Khaki)
54 | d == 2 = H.BGColor (toColor SkyBlue)
55 | otherwise = H.Color (toColor Black)
56
57 pickLabelColor :: [Double] -> H.Attribute
58 pickLabelColor lst
59 | elem 0 lst = dynamicToColor 0
60 | elem 2 lst = dynamicToColor 2
61 | elem 1 lst = dynamicToColor 1
62 | otherwise = dynamicToColor 3
63
64 toDotLabel :: Text.Text -> Label
65 toDotLabel lbl = StrLabel $ fromStrict lbl
66
67 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
68 toAttr k v = customAttribute k v
69
70 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
71 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
72
73 groupIdToDotId :: PhyloGroupId -> DotId
74 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
75
76 branchIdToDotId :: PhyloBranchId -> DotId
77 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
78
79 periodIdToDotId :: PhyloPeriodId -> DotId
80 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
81
82 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
83 groupToTable fdt g = H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
95 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
96 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
97 --------------------------------------
98 header :: H.Row
99 header =
100 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
101 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
102 <> (fromStrict " ( ")
103 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
104 <> (fromStrict " , ")
105 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
106 <> (fromStrict " ) ")
107 <> (pack $ show (getGroupId g)))]]
108 --------------------------------------
109
110 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
111 branchToDotNode b bId =
112 node (branchIdToDotId $ b ^. branch_id)
113 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
114 <> (metaToAttr $ b ^. branch_meta)
115 <> [ toAttr "nodeType" "branch"
116 , toAttr "bId" (pack $ show bId)
117 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
118 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
119 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
120 , toAttr "label" (pack $ show $ b ^. branch_label)
121 ])
122
123 periodToDotNode :: (Date,Date) -> Dot DotId
124 periodToDotNode prd =
125 node (periodIdToDotId prd)
126 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
127 <> [ toAttr "nodeType" "period"
128 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
129 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
130
131
132 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
133 groupToDotNode fdt g bId =
134 node (groupIdToDotId $ getGroupId g)
135 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
136 <> [ toAttr "nodeType" "group"
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
140 , toAttr "bId" (pack $ show bId)
141 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))])
142
143
144 toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
145 toDotEdge source target lbl edgeType = edge source target
146 (case edgeType of
147 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
148 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
149 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
150 , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
151 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
152 , Label (StrLabel $ fromStrict lbl)]
153 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
154 , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
155 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
156
157
158 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
159 mergePointers groups =
160 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
161 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
162 in unionWith (\w w' -> max w w') toChilds toParents
163
164 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
165 mergeAncestors groups = concat
166 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
167 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
168
169
170 toBid :: PhyloGroup -> [PhyloBranch] -> Int
171 toBid g bs =
172 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
173 in fromJust $ elemIndex b' bs
174
175 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
176 exportToDot phylo export =
177 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
178 <> show(length $ export ^. export_groups) <> " groups "
179 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
180 <> "##########################") $
181 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
182
183 {- 1) init the dot graph -}
184 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
185 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
186 , Ratio FillRatio
187 , Style [SItem Filled []],Color [toWColor White]]
188 {-- home made attributes -}
189 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
190 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
191 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
192 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
193 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
194 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
195 ])
196
197 {-
198 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
199
200 -- 2) create a layer for the branches labels -}
201 subgraph (Str "Branches peaks") $ do
202
203 graphAttrs [Rank SameRank]
204 {-
205 -- 3) group the branches by hierarchy
206 -- mapM (\branches ->
207 -- subgraph (Str "Branches clade") $ do
208 -- graphAttrs [Rank SameRank]
209
210 -- -- 4) create a node for each branch
211 -- mapM branchToDotNode branches
212 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
213 -}
214 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
215
216 {-- 5) create a layer for each period -}
217 _ <- mapM (\period ->
218 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
219 graphAttrs [Rank SameRank]
220 periodToDotNode period
221
222 {-- 6) create a node for each group -}
223 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
224 ) $ getPeriodIds phylo
225
226 {-- 7) create the edges between a branch and its first groups -}
227 _ <- mapM (\(bId,groups) ->
228 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
229 )
230 $ toList
231 $ map (\groups -> head' "toDot"
232 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
233 $ sortOn (fst . _phylo_groupPeriod) groups)
234 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
235
236 {- 8) create the edges between the groups -}
237 _ <- mapM (\((k,k'),_) ->
238 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
239 ) $ (toList . mergePointers) $ export ^. export_groups
240
241 _ <- mapM (\((k,k'),_) ->
242 toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
243 ) $ mergeAncestors $ export ^. export_groups
244
245 -- | 10) create the edges between the periods
246 _ <- mapM (\(prd,prd') ->
247 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
248 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
249
250 {- 8) create the edges between the branches
251 -- _ <- mapM (\(bId,bId') ->
252 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
253 -- (Text.pack $ show(branchIdsToProximity bId bId'
254 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
255 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
256 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
257 -}
258
259
260 graphAttrs [Rank SameRank]
261
262
263 ----------------
264 -- | Filter | --
265 ----------------
266
267 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
268 filterByBranchSize thr export =
269 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
270 in export & export_branches .~ (fst splited)
271 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
272
273
274 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
275 processFilters filters qua export =
276 foldl (\export' f -> case f of
277 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
278 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
279 else filterByBranchSize thr export'
280 ) export filters
281
282 --------------
283 -- | Sort | --
284 --------------
285
286 branchToIso :: [PhyloBranch] -> [PhyloBranch]
287 branchToIso branches =
288 let steps = map sum
289 $ inits
290 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
291 $ zip branches
292 $ ([0] ++ (map (\(b,b') ->
293 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
294 in (b' ^. branch_seaLevel) !! (idx - 1)
295 ) $ listToSeq branches))
296 in map (\(x,b) -> b & branch_x .~ x)
297 $ zip steps branches
298
299
300 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
301 sortByHierarchy depth branches =
302 if (length branches == 1)
303 then branchToIso branches
304 else branchToIso $ concat
305 $ map (\branches' ->
306 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
307 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
308 ++ (sortByHierarchy (depth + 1) (snd partitions)))
309 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
310 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
311
312
313 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
314 sortByBirthDate order export =
315 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
316 branches' = case order of
317 Asc -> branches
318 Desc -> reverse branches
319 in export & export_branches .~ branches'
320
321 processSort :: Sort -> PhyloExport -> PhyloExport
322 processSort sort' export = case sort' of
323 ByBirthDate o -> sortByBirthDate o export
324 ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
325
326
327 -----------------
328 -- | Metrics | --
329 -----------------
330
331 -- | Return the conditional probability of i knowing j
332 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
333 conditional m i j = (findWithDefault 0 (i,j) m)
334 / (m ! (j,j))
335
336
337 -- | Return the genericity score of a given ngram
338 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
339 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
340 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
341
342
343 -- | Return the specificity score of a given ngram
344 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
345 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
346 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
347
348
349 -- | Return the inclusion score of a given ngram
350 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
351 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
352 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
353
354
355 ngramsMetrics :: PhyloExport -> PhyloExport
356 ngramsMetrics export =
357 over ( export_groups
358 . traverse )
359 (\g -> g & phylo_groupMeta %~ insert "genericity"
360 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
361 & phylo_groupMeta %~ insert "specificity"
362 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
363 & phylo_groupMeta %~ insert "inclusion"
364 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
365 ) export
366
367
368 branchDating :: PhyloExport -> PhyloExport
369 branchDating export =
370 over ( export_branches
371 . traverse )
372 (\b ->
373 let groups = sortOn fst
374 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
375 then acc ++ [g ^. phylo_groupPeriod]
376 else acc ) [] $ export ^. export_groups
377 periods = nub groups
378 birth = fst $ head' "birth" groups
379 age = (snd $ last' "age" groups) - birth
380 in b & branch_meta %~ insert "birth" [fromIntegral birth]
381 & branch_meta %~ insert "age" [fromIntegral age]
382 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
383
384 processMetrics :: PhyloExport -> PhyloExport
385 processMetrics export = ngramsMetrics
386 $ branchDating export
387
388
389 -----------------
390 -- | Taggers | --
391 -----------------
392
393 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
394 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
395 $ take nth
396 $ reverse
397 $ sortOn snd $ zip [0..] meta
398
399
400 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
401 mostInclusive nth foundations export =
402 over ( export_branches
403 . traverse )
404 (\b ->
405 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
406 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
407 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
408 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
409 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
410 in b & branch_label .~ lbl ) export
411
412
413 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
414 mostEmergentInclusive nth foundations export =
415 over ( export_groups
416 . traverse )
417 (\g ->
418 let lbl = ngramsToLabel foundations
419 $ take nth
420 $ map (\(_,(_,idx)) -> idx)
421 $ concat
422 $ map (\groups -> sortOn (fst . snd) groups)
423 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
424 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
425 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
426 in g & phylo_groupLabel .~ lbl ) export
427
428
429 processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
430 processLabels labels foundations export =
431 foldl (\export' label ->
432 case label of
433 GroupLabel tagger nth ->
434 case tagger of
435 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
436 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
437 BranchLabel tagger nth ->
438 case tagger of
439 MostInclusive -> mostInclusive nth foundations export'
440 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
441
442
443 ------------------
444 -- | Dynamics | --
445 ------------------
446
447
448 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
449 toDynamics n parents g m =
450 let prd = g ^. phylo_groupPeriod
451 end = last' "dynamics" (sort $ map snd $ elems m)
452 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
453 {- decrease -}
454 then 2
455 else if ((fst prd) == (fst $ m ! n))
456 {- recombination -}
457 then 0
458 else if isNew
459 {- emergence -}
460 then 1
461 else 3
462 where
463 --------------------------------------
464 isNew :: Bool
465 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
466
467
468 processDynamics :: [PhyloGroup] -> [PhyloGroup]
469 processDynamics groups =
470 map (\g ->
471 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
472 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
473 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
474 where
475 --------------------------------------
476 mapNgrams :: Map Int (Date,Date)
477 mapNgrams = map (\dates ->
478 let dates' = sort dates
479 in (head' "dynamics" dates', last' "dynamics" dates'))
480 $ fromListWith (++)
481 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
482 $ (g ^. phylo_groupNgrams))) [] groups
483
484
485 -----------------
486 -- | horizon | --
487 -----------------
488
489 getGroupThr :: Double -> PhyloGroup -> Double
490 getGroupThr step g =
491 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
492 breaks = (g ^. phylo_groupMeta) ! "breaks"
493 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
494
495 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
496 toAncestor nbDocs diago proximity step candidates ego =
497 let curr = ego ^. phylo_groupAncestors
498 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
499 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
500 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
501 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
502
503
504 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
505 headsToAncestors nbDocs diago proximity step heads acc =
506 if (null heads)
507 then acc
508 else
509 let ego = head' "headsToAncestors" heads
510 heads' = tail' "headsToAncestors" heads
511 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
512
513
514 toHorizon :: Phylo -> Phylo
515 toHorizon phylo =
516 let phyloAncestor = updatePhyloGroups
517 level
518 (fromList $ map (\g -> (getGroupId g, g))
519 $ concat
520 $ tracePhyloAncestors newGroups) phylo
521 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
522 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
523 in updatePhyloGroups level reBranched phylo
524 where
525 -- | 1) for each periods
526 periods :: [PhyloPeriodId]
527 periods = getPeriodIds phylo
528 -- --
529 level :: Level
530 level = getLastLevel phylo
531 -- --
532 frame :: Int
533 frame = getTimeFrame $ timeUnit $ getConfig phylo
534 -- | 2) find ancestors between groups without parents
535 mapGroups :: [[PhyloGroup]]
536 mapGroups = map (\prd ->
537 let groups = getGroupsFromLevelPeriods level [prd] phylo
538 childs = getPreviousChildIds level frame prd periods phylo
539 heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
540 noHeads = groups \\ heads
541 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
542 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
543 proximity = (phyloProximity $ getConfig phylo)
544 step = case getSeaElevation phylo of
545 Constante _ s -> s
546 Adaptative _ -> undefined
547 -- in headsToAncestors nbDocs diago proximity heads groups []
548 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
549 $ headsToAncestors nbDocs diago proximity step heads []
550 ) periods
551 -- | 3) process this task concurrently
552 newGroups :: [[PhyloGroup]]
553 newGroups = mapGroups `using` parList rdeepseq
554 --------------------------------------
555
556 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
557 getPreviousChildIds lvl frame curr prds phylo =
558 concat $ map ((map fst) . _phylo_groupPeriodChilds)
559 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
560
561 ---------------------
562 -- | phyloExport | --
563 ---------------------
564
565 toPhyloExport :: Phylo -> DotGraph DotId
566 toPhyloExport phylo = exportToDot phylo
567 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
568 $ processSort (exportSort $ getConfig phylo)
569 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
570 $ processMetrics export
571 where
572 export :: PhyloExport
573 export = PhyloExport groups branches
574 --------------------------------------
575 branches :: [PhyloBranch]
576 branches = map (\g ->
577 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
578 breaks = (g ^. phylo_groupMeta) ! "breaks"
579 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
580 in PhyloBranch (g ^. phylo_groupBranchId)
581 canonId
582 seaLvl
583 0
584 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
585 0
586 0
587 "" empty)
588 $ map (\gs -> head' "export" gs)
589 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
590 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
591 --------------------------------------
592 groups :: [PhyloGroup]
593 groups = traceExportGroups
594 $ processDynamics
595 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
596 $ tracePhyloInfo
597 $ toHorizon phylo
598
599
600 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
601 traceExportBranches branches = trace ("\n"
602 <> "-- | Export " <> show(length branches) <> " branches") branches
603
604 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
605 tracePhyloAncestors groups = trace ("\n"
606 <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
607 ) groups
608
609 tracePhyloInfo :: Phylo -> Phylo
610 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
611 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
612 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
613 ) phylo
614
615
616 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
617 traceExportGroups groups = trace ("\n" <> "-- | Export "
618 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
619 <> show(length groups) <> " groups and "
620 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
621 ) groups
622