2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Double -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Double] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76 periodIdToDotId :: PhyloPeriodId -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
213 , Style [SItem Filled []],Color [toWColor White]]
214 {-- home made attributes -}
215 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
216 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
217 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
218 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
219 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
220 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
221 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
222 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
223 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
227 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
229 -- 2) create a layer for the branches labels -}
230 subgraph (Str "Branches peaks") $ do
232 -- graphAttrs [Rank SameRank]
234 -- 3) group the branches by hierarchy
235 -- mapM (\branches ->
236 -- subgraph (Str "Branches clade") $ do
237 -- graphAttrs [Rank SameRank]
239 -- -- 4) create a node for each branch
240 -- mapM branchToDotNode branches
241 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
243 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
245 {-- 5) create a layer for each period -}
246 _ <- mapM (\period ->
247 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
248 graphAttrs [Rank SameRank]
249 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
251 {-- 6) create a node for each group -}
252 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
253 ) $ phylo ^. phylo_periods
255 {-- 7) create the edges between a branch and its first groups -}
256 _ <- mapM (\(bId,groups) ->
257 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
260 $ map (\groups -> head' "toDot"
261 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
262 $ sortOn (fst . _phylo_groupPeriod) groups)
263 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
265 {- 8) create the edges between the groups -}
266 _ <- mapM (\((k,k'),v) ->
267 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
268 ) $ (toList . mergePointers) $ export ^. export_groups
270 {- 8-bis) create the edges between the groups -}
271 {- _ <- mapM (\((k,k'),v) ->
272 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
273 ) $ mergePointersMemory $ export ^. export_groups -}
275 _ <- mapM (\((k,k'),v) ->
276 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
277 ) $ mergeAncestors $ export ^. export_groups
279 -- 10) create the edges between the periods
280 _ <- mapM (\(prd,prd') ->
281 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
282 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
284 {- 8) create the edges between the branches
285 -- _ <- mapM (\(bId,bId') ->
286 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
287 -- (Text.pack $ show(branchIdsToProximity bId bId'
288 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
289 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
290 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
294 graphAttrs [Rank SameRank]
301 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
302 filterByBranchSize thr export =
303 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
304 in export & export_branches .~ (fst splited)
305 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
308 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
309 processFilters filters qua export =
310 foldl (\export' f -> case f of
311 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
312 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
313 else filterByBranchSize thr export'
320 branchToIso :: [PhyloBranch] -> [PhyloBranch]
321 branchToIso branches =
324 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
326 $ ([0] ++ (map (\(b,b') ->
327 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
328 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
330 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
331 then (b' ^. branch_seaLevel) !! (lmin - 1)
332 else (b' ^. branch_seaLevel) !! (idx - 1)
333 ) $ listToSeq branches))
334 in map (\(x,b) -> b & branch_x .~ x)
337 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
338 branchToIso' start step branches =
339 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
341 $ ([0] ++ (map (\(b,b') ->
342 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
343 in 1 - start - step * root) $ listToSeq branches))
344 in map (\(x,b) -> b & branch_x .~ x)
348 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
349 sortByHierarchy depth branches =
350 if (length branches == 1)
354 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
355 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
356 ++ (sortByHierarchy (depth + 1) (snd partitions)))
357 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
358 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
361 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
362 sortByBirthDate order export =
363 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
364 branches' = case order of
366 Desc -> reverse branches
367 in export & export_branches .~ branches'
369 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
370 processSort sort' elev export = case sort' of
371 ByBirthDate o -> sortByBirthDate o export
372 ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
373 $ sortByHierarchy 0 (export ^. export_branches))
380 -- | Return the conditional probability of i knowing j
381 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
382 conditional m i j = (findWithDefault 0 (i,j) m)
386 -- | Return the genericity score of a given ngram
387 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
388 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
389 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
392 -- | Return the specificity score of a given ngram
393 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
394 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
395 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
398 -- | Return the inclusion score of a given ngram
399 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
400 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
401 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
404 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
405 ngramsMetrics phylo export =
408 (\g -> g & phylo_groupMeta %~ insert "genericity"
409 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
410 & phylo_groupMeta %~ insert "specificity"
411 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
412 & phylo_groupMeta %~ insert "inclusion"
413 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
414 & phylo_groupMeta %~ insert "frequence"
415 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
419 branchDating :: PhyloExport -> PhyloExport
420 branchDating export =
421 over ( export_branches
424 let groups = sortOn fst
425 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
426 then acc ++ [g ^. phylo_groupPeriod]
427 else acc ) [] $ export ^. export_groups
429 birth = fst $ head' "birth" groups
430 age = (snd $ last' "age" groups) - birth
431 in b & branch_meta %~ insert "birth" [fromIntegral birth]
432 & branch_meta %~ insert "age" [fromIntegral age]
433 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
435 processMetrics :: Phylo -> PhyloExport -> PhyloExport
436 processMetrics phylo export = ngramsMetrics phylo
437 $ branchDating export
444 nk :: Int -> [[Int]] -> Int
446 $ map (\g -> if (elem n g)
451 tf :: Int -> [[Int]] -> Double
452 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
455 idf :: Int -> [[Int]] -> Double
456 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
459 findTfIdf :: [[Int]] -> [(Int,Double)]
460 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
463 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
464 findEmergences groups freq =
465 let ngrams = map _phylo_groupNgrams groups
466 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
467 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
468 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
469 in reverse $ sortOn snd
470 $ map (\(n,_) -> if (member n freq)
475 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
476 mostEmergentTfIdf nth freq foundations export =
477 over ( export_branches
480 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
481 tfidf = findTfIdf (map _phylo_groupNgrams groups)
482 emergences = findEmergences groups freq
483 selected = if (null emergences)
484 then map fst $ take nth tfidf
485 else [fst $ head' "mostEmergentTfIdf" emergences]
486 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
487 in b & branch_label .~ (ngramsToLabel foundations selected)) export
490 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
491 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
494 $ sortOn snd $ zip [0..] meta
497 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
498 mostInclusive nth foundations export =
499 over ( export_branches
502 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
503 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
504 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
505 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
506 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
507 in b & branch_label .~ lbl ) export
510 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
511 mostEmergentInclusive nth foundations export =
515 let lbl = ngramsToLabel foundations
517 $ map (\(_,(_,idx)) -> idx)
519 $ map (\groups -> sortOn (fst . snd) groups)
520 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
521 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
522 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
523 in g & phylo_groupLabel .~ lbl ) export
526 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
527 processLabels labels foundations freq export =
528 foldl (\export' label ->
530 GroupLabel tagger nth ->
532 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
533 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
534 BranchLabel tagger nth ->
536 MostInclusive -> mostInclusive nth foundations export'
537 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
538 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
546 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
547 toDynamics n parents g m =
548 let prd = g ^. phylo_groupPeriod
549 end = last' "dynamics" (sort $ map snd $ elems m)
550 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
553 else if ((fst prd) == (fst $ m ! n))
561 --------------------------------------
563 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
566 processDynamics :: [PhyloGroup] -> [PhyloGroup]
567 processDynamics groups =
569 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
570 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
571 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
573 --------------------------------------
574 mapNgrams :: Map Int (Date,Date)
575 mapNgrams = map (\dates ->
576 let dates' = sort dates
577 in (head' "dynamics" dates', last' "dynamics" dates'))
579 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
580 $ (g ^. phylo_groupNgrams))) [] groups
587 getGroupThr :: Double -> PhyloGroup -> Double
589 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
590 breaks = (g ^. phylo_groupMeta) ! "breaks"
591 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
593 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
594 toAncestor nbDocs diago proximity step candidates ego =
595 let curr = ego ^. phylo_groupAncestors
596 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
597 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
598 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
599 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
602 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
603 headsToAncestors nbDocs diago proximity step heads acc =
607 let ego = head' "headsToAncestors" heads
608 heads' = tail' "headsToAncestors" heads
609 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
612 toHorizon :: Phylo -> Phylo
614 let phyloAncestor = updatePhyloGroups
616 (fromList $ map (\g -> (getGroupId g, g))
618 $ tracePhyloAncestors newGroups) phylo
619 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
620 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
621 in updatePhyloGroups level reBranched phylo
623 -- | 1) for each periods
624 periods :: [PhyloPeriodId]
625 periods = getPeriodIds phylo
628 level = getLastLevel phylo
631 frame = getTimeFrame $ timeUnit $ getConfig phylo
632 -- | 2) find ancestors between groups without parents
633 mapGroups :: [[PhyloGroup]]
634 mapGroups = map (\prd ->
635 let groups = getGroupsFromLevelPeriods level [prd] phylo
636 childs = getPreviousChildIds level frame prd periods phylo
637 -- maybe add a better filter for non isolated ancestors
638 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
639 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
640 noHeads = groups \\ heads
641 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
642 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
643 proximity = (phyloProximity $ getConfig phylo)
644 step = case getSeaElevation phylo of
646 Adaptative _ -> undefined
647 -- in headsToAncestors nbDocs diago proximity heads groups []
648 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
649 $ headsToAncestors nbDocs diago proximity step heads []
651 -- | 3) process this task concurrently
652 newGroups :: [[PhyloGroup]]
653 newGroups = mapGroups `using` parList rdeepseq
654 --------------------------------------
656 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
657 getPreviousChildIds lvl frame curr prds phylo =
658 concat $ map ((map fst) . _phylo_groupPeriodChilds)
659 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
661 ---------------------
662 -- | phyloExport | --
663 ---------------------
665 toPhyloExport :: Phylo -> DotGraph DotId
666 toPhyloExport phylo = exportToDot phylo
667 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
668 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
669 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
670 $ processMetrics phylo export
672 export :: PhyloExport
673 export = PhyloExport groups branches
674 --------------------------------------
675 branches :: [PhyloBranch]
676 branches = map (\g ->
677 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
678 breaks = (g ^. phylo_groupMeta) ! "breaks"
679 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
680 in PhyloBranch (g ^. phylo_groupBranchId)
684 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
688 $ map (\gs -> head' "export" gs)
689 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
690 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
691 --------------------------------------
692 groups :: [PhyloGroup]
693 groups = traceExportGroups
695 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
696 $ tracePhyloInfo phylo
697 -- \$ toHorizon phylo
700 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
701 traceExportBranches branches = trace ("\n"
702 <> "-- | Export " <> show(length branches) <> " branches") branches
704 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
705 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
707 tracePhyloInfo :: Phylo -> Phylo
708 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
709 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
710 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
714 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
715 traceExportGroups groups = trace ("\n" <> "-- | Export "
716 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
717 <> show(length groups) <> " groups and "
718 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"