2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Int -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Int] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 1 lst = dynamicToColor 1
58 | elem 2 lst = dynamicToColor 2
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d, d'), lvl), idx) =
72 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
74 branchIdToDotId :: PhyloBranchId -> DotId
75 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
77 periodIdToDotId :: Period -> DotId
78 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
80 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
81 branchToDotNode b bId =
82 node (branchIdToDotId $ b ^. branch_id)
83 ( [ FillColor [toWColor CornSilk]
87 , Style [SItem Bold []] ]
88 <> (metaToAttr $ b ^. branch_meta)
89 <> [ toAttr "nodeType" "branch"
90 , toAttr "bId" (pack $ show bId)
91 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
92 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
93 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
94 , toAttr "label" (pack $ show $ b ^. branch_label)
97 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
98 periodToDotNode prd prd' =
99 node (periodIdToDotId prd) $
102 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
103 <> [ toAttr "nodeType" "period"
104 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
105 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
106 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
107 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
110 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
111 groupToDotNode fdt g bId =
112 node (groupIdToDotId $ getGroupId g)
114 <> [ toAttr "nodeType" "group"
115 , toAttr "gid" (groupIdToDotId $ getGroupId g)
116 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
117 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
118 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
119 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
120 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
121 , toAttr "bId" (pack $ show bId)
122 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
123 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
124 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
125 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
126 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
127 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
128 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
129 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
130 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
131 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
132 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
135 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
136 toDotEdge' source target thr w edgeType = edge source target
138 GroupToGroup -> undefined
139 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
140 BranchToGroup -> undefined
141 BranchToBranch -> undefined
142 GroupToAncestor -> undefined
143 PeriodToPeriod -> undefined)
146 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
147 toDotEdge source target lbl edgeType = edge source target
149 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
150 GroupToGroupMemory -> undefined
151 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
152 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
153 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
154 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
157 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
158 mergePointers groups =
159 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
160 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
161 in unionWith (\w w' -> max w w') toChilds toParents
163 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
164 mergePointersMemory groups =
165 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
166 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
167 in concat [toChilds,toParents]
169 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
170 mergeAncestors groups = concat
171 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
172 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
175 toBid :: PhyloGroup -> [PhyloBranch] -> Int
177 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
178 in fromJust $ elemIndex b' bs
180 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
181 exportToDot phylo export =
182 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
183 <> show(length $ export ^. export_groups) <> " groups "
184 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
185 <> "##########################") $
186 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
188 {- 1) init the dot graph -}
189 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
190 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
193 , Style [SItem Filled []],Color [toWColor White]]
194 {-- home made attributes -}
195 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
196 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
197 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
198 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
199 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
200 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
201 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
202 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
203 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
204 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
205 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
206 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
207 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
211 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
213 -- 2) create a layer for the branches labels -}
214 subgraph (Str "Branches peaks") $ do
216 -- graphAttrs [Rank SameRank]
218 -- 3) group the branches by hierarchy
219 -- mapM (\branches ->
220 -- subgraph (Str "Branches clade") $ do
221 -- graphAttrs [Rank SameRank]
223 -- -- 4) create a node for each branch
224 -- mapM branchToDotNode branches
225 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
227 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
229 {-- 5) create a layer for each period -}
230 _ <- mapM (\period ->
231 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
232 graphAttrs [Rank SameRank]
233 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
235 {-- 6) create a node for each group -}
236 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
237 ) $ phylo ^. phylo_periods
239 {-- 7) create the edges between a branch and its first groups -}
240 _ <- mapM (\(bId,groups) ->
241 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
244 $ map (\groups -> head' "toDot"
245 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
246 $ sortOn (fst . _phylo_groupPeriod) groups)
247 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
249 {- 8) create the edges between the groups -}
250 _ <- mapM (\((k,k'),v) ->
251 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
252 ) $ (toList . mergePointers) $ export ^. export_groups
254 {- 8-bis) create the edges between the groups -}
255 {- _ <- mapM (\((k,k'),v) ->
256 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
257 ) $ mergePointersMemory $ export ^. export_groups -}
259 _ <- mapM (\((k,k'),v) ->
260 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
261 ) $ mergeAncestors $ export ^. export_groups
263 -- 10) create the edges between the periods
264 _ <- mapM (\(prd,prd') ->
265 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
266 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
268 {- 8) create the edges between the branches
269 -- _ <- mapM (\(bId,bId') ->
270 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
271 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
272 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
273 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
274 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
278 graphAttrs [Rank SameRank]
285 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
286 filterByBranchSize thr export =
287 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
288 in export & export_branches .~ (fst splited)
289 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
292 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
293 processFilters filters qua export =
294 foldl (\export' f -> case f of
295 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
296 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
297 else filterByBranchSize thr export'
304 branchToIso :: [PhyloBranch] -> [PhyloBranch]
305 branchToIso branches =
308 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
310 $ ([0] ++ (map (\(b,b') ->
311 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
312 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
314 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
315 then (b' ^. branch_seaLevel) !! (lmin - 1)
316 else (b' ^. branch_seaLevel) !! (idx - 1)
317 ) $ listToSeq branches))
318 in map (\(x,b) -> b & branch_x .~ x)
321 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
322 branchToIso' start step branches =
323 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
325 $ ([0] ++ (map (\(b,b') ->
326 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
327 in 1 - start - step * root) $ listToSeq branches))
328 in map (\(x,b) -> b & branch_x .~ x)
332 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
333 sortByHierarchy depth branches =
334 if (length branches == 1)
338 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
339 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
340 ++ (sortByHierarchy (depth + 1) (snd partitions)))
341 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
342 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
345 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
346 sortByBirthDate order export =
347 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
348 branches' = case order of
350 Desc -> reverse branches
351 in export & export_branches .~ branches'
353 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
354 processSort sort' elev export = case sort' of
355 ByBirthDate o -> sortByBirthDate o export
356 ByHierarchy _ -> case elev of
357 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
358 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
359 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
365 -- | Return the conditional probability of i knowing j
366 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
367 conditional m i j = (findWithDefault 0 (i,j) m)
371 -- | Return the genericity score of a given ngram
372 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
373 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
374 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
377 -- | Return the specificity score of a given ngram
378 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
379 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
380 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
383 -- | Return the inclusion score of a given ngram
384 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
385 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
386 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
389 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
390 ngramsMetrics phylo export =
393 (\g -> g & phylo_groupMeta %~ insert "genericity"
394 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
395 & phylo_groupMeta %~ insert "specificity"
396 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
397 & phylo_groupMeta %~ insert "inclusion"
398 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
399 & phylo_groupMeta %~ insert "frequence"
400 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
404 branchDating :: PhyloExport -> PhyloExport
405 branchDating export =
406 over ( export_branches
409 let groups = sortOn fst
410 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
411 then acc ++ [g ^. phylo_groupPeriod]
412 else acc ) [] $ export ^. export_groups
414 birth = fst $ head' "birth" groups
415 death = snd $ last' "death" groups
417 in b & branch_meta %~ insert "birth" [fromIntegral birth]
418 & branch_meta %~ insert "death" [fromIntegral death]
419 & branch_meta %~ insert "age" [fromIntegral age]
420 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
422 processMetrics :: Phylo -> PhyloExport -> PhyloExport
423 processMetrics phylo export = ngramsMetrics phylo
424 $ branchDating export
431 nk :: Int -> [[Int]] -> Int
433 $ map (\g -> if (elem n g)
438 tf :: Int -> [[Int]] -> Double
439 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
442 idf :: Int -> [[Int]] -> Double
443 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
446 findTfIdf :: [[Int]] -> [(Int,Double)]
447 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
450 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
451 findEmergences groups freq =
452 let ngrams = map _phylo_groupNgrams groups
453 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
454 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
455 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
456 in reverse $ sortOn snd
457 $ map (\(n,_) -> if (member n freq)
462 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
463 mostEmergentTfIdf nth freq foundations export =
464 over ( export_branches
467 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
468 tfidf = findTfIdf (map _phylo_groupNgrams groups)
469 emergences = findEmergences groups freq
470 selected = if (null emergences)
471 then map fst $ take nth tfidf
472 else [fst $ head' "mostEmergentTfIdf" emergences]
473 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
474 in b & branch_label .~ (ngramsToLabel foundations selected)) export
477 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
478 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
481 $ sortOn snd $ zip [0..] meta
484 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
485 mostInclusive nth foundations export =
486 over ( export_branches
489 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
490 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
491 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
492 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
493 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
494 in b & branch_label .~ lbl ) export
497 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
498 mostEmergentInclusive nth foundations export =
502 let lbl = ngramsToLabel foundations
504 $ map (\(_,(_,idx)) -> idx)
506 $ map (\groups -> sortOn (fst . snd) groups)
507 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
508 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
509 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
510 in g & phylo_groupLabel .~ lbl ) export
513 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
514 processLabels labels foundations freq export =
515 foldl (\export' label ->
517 GroupLabel tagger nth ->
519 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
520 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
521 BranchLabel tagger nth ->
523 MostInclusive -> mostInclusive nth foundations export'
524 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
525 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
532 -- utiliser & creer une Map FdtId [PhyloGroup]
533 -- n = index of the current term
534 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
535 toDynamics n elders g m =
536 let prd = g ^. phylo_groupPeriod
537 end = last' "dynamics" (sort $ map snd $ elems m)
538 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
541 else if ((fst prd) == (fst $ m ! n))
549 --------------------------------------
551 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
554 processDynamics :: [PhyloGroup] -> [PhyloGroup]
555 processDynamics groups =
557 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
558 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
559 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
561 --------------------------------------
562 mapNgrams :: Map FdtId (Date,Date)
563 mapNgrams = map (\dates ->
564 let dates' = sort dates
565 in (head' "dynamics" dates', last' "dynamics" dates'))
567 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
568 $ (g ^. phylo_groupNgrams))) [] groups
575 getGroupThr :: Double -> PhyloGroup -> Double
577 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
578 breaks = (g ^. phylo_groupMeta) ! "breaks"
579 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
588 toAncestor nbDocs diago similarity step candidates ego =
589 let curr = ego ^. phylo_groupAncestors
590 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
591 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
592 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
593 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
596 headsToAncestors :: Double
603 headsToAncestors nbDocs diago similarity step heads acc =
607 let ego = head' "headsToAncestors" heads
608 heads' = tail' "headsToAncestors" heads
609 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
612 toHorizon :: Phylo -> Phylo
614 let phyloAncestor = updatePhyloGroups
616 (fromList $ map (\g -> (getGroupId g, g))
618 $ tracePhyloAncestors newGroups) phylo
619 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
620 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
621 in updatePhyloGroups scale reBranched phylo
623 -- | 1) for each periods
625 periods = getPeriodIds phylo
628 scale = getLastLevel phylo
631 frame = getTimeFrame $ timeUnit $ getConfig phylo
632 -- | 2) find ancestors between groups without parents
633 mapGroups :: [[PhyloGroup]]
634 mapGroups = map (\prd ->
635 let groups = getGroupsFromScalePeriods scale [prd] phylo
636 childs = getPreviousChildIds scale frame prd periods phylo
637 -- maybe add a better filter for non isolated ancestors
638 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
639 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
640 noHeads = groups \\ heads
641 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
642 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
643 sim = (similarity $ getConfig phylo)
644 step = case getSeaElevation phylo of
648 -- in headsToAncestors nbDocs diago Similarity heads groups []
649 in map (toAncestor nbDocs diago sim step noHeads)
650 $ headsToAncestors nbDocs diago sim step heads []
652 -- | 3) process this task concurrently
653 newGroups :: [[PhyloGroup]]
654 newGroups = mapGroups `using` parList rdeepseq
655 --------------------------------------
657 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
658 getPreviousChildIds lvl frame curr prds phylo =
659 concat $ map ((map fst) . _phylo_groupPeriodChilds)
660 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
662 ---------------------
663 -- | phyloExport | --
664 ---------------------
666 toPhyloExport :: Phylo -> DotGraph DotId
667 toPhyloExport phylo = exportToDot phylo
668 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
669 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
670 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
671 $ processMetrics phylo export
673 export :: PhyloExport
674 export = PhyloExport groups branches
675 --------------------------------------
676 branches :: [PhyloBranch]
677 branches = map (\g ->
678 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
679 breaks = (g ^. phylo_groupMeta) ! "breaks"
680 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
681 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
682 , _branch_canonId = canonId
683 , _branch_seaLevel = seaLvl
685 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
689 , _branch_meta = empty })
690 $ map (head' "export")
691 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
692 $ sortOn (^. phylo_groupBranchId) groups
693 --------------------------------------
694 groups :: [PhyloGroup]
695 groups = traceExportGroups
698 $ getGroupsFromScale (phyloScale $ getConfig phylo)
699 $ tracePhyloInfo phylo
702 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
703 traceExportBranches branches = trace ("\n"
704 <> "-- | Export " <> show(length branches) <> " branches") branches
706 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
707 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
709 tracePhyloInfo :: Phylo -> Phylo
710 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
711 <> show(getLevel phylo) <> " applied to "
712 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
716 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
717 traceExportGroups groups = trace ("\n" <> "-- | Export "
718 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
719 <> show(length groups) <> " groups and "
720 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"