2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Double -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Double] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76 periodIdToDotId :: PhyloPeriodId -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
212 , Style [SItem Filled []],Color [toWColor White]]
213 {-- home made attributes -}
214 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
215 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
216 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
217 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
218 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
219 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
220 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
221 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
222 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
226 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
228 -- 2) create a layer for the branches labels -}
229 subgraph (Str "Branches peaks") $ do
231 -- graphAttrs [Rank SameRank]
233 -- 3) group the branches by hierarchy
234 -- mapM (\branches ->
235 -- subgraph (Str "Branches clade") $ do
236 -- graphAttrs [Rank SameRank]
238 -- -- 4) create a node for each branch
239 -- mapM branchToDotNode branches
240 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
242 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
244 {-- 5) create a layer for each period -}
245 _ <- mapM (\period ->
246 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
247 graphAttrs [Rank SameRank]
248 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
250 {-- 6) create a node for each group -}
251 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
252 ) $ phylo ^. phylo_periods
254 {-- 7) create the edges between a branch and its first groups -}
255 _ <- mapM (\(bId,groups) ->
256 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
259 $ map (\groups -> head' "toDot"
260 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
261 $ sortOn (fst . _phylo_groupPeriod) groups)
262 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
264 {- 8) create the edges between the groups -}
265 _ <- mapM (\((k,k'),v) ->
266 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
267 ) $ (toList . mergePointers) $ export ^. export_groups
269 {- 8-bis) create the edges between the groups -}
270 {- _ <- mapM (\((k,k'),v) ->
271 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
272 ) $ mergePointersMemory $ export ^. export_groups -}
274 _ <- mapM (\((k,k'),v) ->
275 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
276 ) $ mergeAncestors $ export ^. export_groups
278 -- 10) create the edges between the periods
279 _ <- mapM (\(prd,prd') ->
280 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
281 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
283 {- 8) create the edges between the branches
284 -- _ <- mapM (\(bId,bId') ->
285 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
286 -- (Text.pack $ show(branchIdsToProximity bId bId'
287 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
288 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
289 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
293 graphAttrs [Rank SameRank]
300 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
301 filterByBranchSize thr export =
302 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
303 in export & export_branches .~ (fst splited)
304 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
307 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
308 processFilters filters qua export =
309 foldl (\export' f -> case f of
310 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
311 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
312 else filterByBranchSize thr export'
319 branchToIso :: [PhyloBranch] -> [PhyloBranch]
320 branchToIso branches =
323 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
325 $ ([0] ++ (map (\(b,b') ->
326 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
327 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
329 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
330 then (b' ^. branch_seaLevel) !! (lmin - 1)
331 else (b' ^. branch_seaLevel) !! (idx - 1)
332 ) $ listToSeq branches))
333 in map (\(x,b) -> b & branch_x .~ x)
336 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
337 branchToIso' start step branches =
338 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
340 $ ([0] ++ (map (\(b,b') ->
341 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
342 in 1 - start - step * root) $ listToSeq branches))
343 in map (\(x,b) -> b & branch_x .~ x)
347 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
348 sortByHierarchy depth branches =
349 if (length branches == 1)
353 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
354 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
355 ++ (sortByHierarchy (depth + 1) (snd partitions)))
356 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
357 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
360 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
361 sortByBirthDate order export =
362 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
363 branches' = case order of
365 Desc -> reverse branches
366 in export & export_branches .~ branches'
368 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
369 processSort sort' elev export = case sort' of
370 ByBirthDate o -> sortByBirthDate o export
371 ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
372 $ sortByHierarchy 0 (export ^. export_branches))
379 -- | Return the conditional probability of i knowing j
380 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
381 conditional m i j = (findWithDefault 0 (i,j) m)
385 -- | Return the genericity score of a given ngram
386 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
387 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
388 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
391 -- | Return the specificity score of a given ngram
392 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
393 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
394 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
397 -- | Return the inclusion score of a given ngram
398 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
399 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
400 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
403 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
404 ngramsMetrics phylo export =
407 (\g -> g & phylo_groupMeta %~ insert "genericity"
408 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
409 & phylo_groupMeta %~ insert "specificity"
410 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
411 & phylo_groupMeta %~ insert "inclusion"
412 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
413 & phylo_groupMeta %~ insert "frequence"
414 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
418 branchDating :: PhyloExport -> PhyloExport
419 branchDating export =
420 over ( export_branches
423 let groups = sortOn fst
424 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
425 then acc ++ [g ^. phylo_groupPeriod]
426 else acc ) [] $ export ^. export_groups
428 birth = fst $ head' "birth" groups
429 age = (snd $ last' "age" groups) - birth
430 in b & branch_meta %~ insert "birth" [fromIntegral birth]
431 & branch_meta %~ insert "age" [fromIntegral age]
432 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
434 processMetrics :: Phylo -> PhyloExport -> PhyloExport
435 processMetrics phylo export = ngramsMetrics phylo
436 $ branchDating export
443 nk :: Int -> [[Int]] -> Int
445 $ map (\g -> if (elem n g)
450 tf :: Int -> [[Int]] -> Double
451 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
454 idf :: Int -> [[Int]] -> Double
455 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
458 findTfIdf :: [[Int]] -> [(Int,Double)]
459 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
462 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
463 findEmergences groups freq =
464 let ngrams = map _phylo_groupNgrams groups
465 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
466 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
467 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
468 in reverse $ sortOn snd
469 $ map (\(n,_) -> if (member n freq)
474 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
475 mostEmergentTfIdf nth freq foundations export =
476 over ( export_branches
479 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
480 tfidf = findTfIdf (map _phylo_groupNgrams groups)
481 emergences = findEmergences groups freq
482 selected = if (null emergences)
483 then map fst $ take nth tfidf
484 else [fst $ head' "mostEmergentTfIdf" emergences]
485 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
486 in b & branch_label .~ (ngramsToLabel foundations selected)) export
489 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
490 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
493 $ sortOn snd $ zip [0..] meta
496 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
497 mostInclusive nth foundations export =
498 over ( export_branches
501 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
502 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
503 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
504 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
505 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
506 in b & branch_label .~ lbl ) export
509 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
510 mostEmergentInclusive nth foundations export =
514 let lbl = ngramsToLabel foundations
516 $ map (\(_,(_,idx)) -> idx)
518 $ map (\groups -> sortOn (fst . snd) groups)
519 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
520 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
521 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
522 in g & phylo_groupLabel .~ lbl ) export
525 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
526 processLabels labels foundations freq export =
527 foldl (\export' label ->
529 GroupLabel tagger nth ->
531 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
532 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
533 BranchLabel tagger nth ->
535 MostInclusive -> mostInclusive nth foundations export'
536 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
537 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
545 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
546 toDynamics n parents g m =
547 let prd = g ^. phylo_groupPeriod
548 end = last' "dynamics" (sort $ map snd $ elems m)
549 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
552 else if ((fst prd) == (fst $ m ! n))
560 --------------------------------------
562 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
565 processDynamics :: [PhyloGroup] -> [PhyloGroup]
566 processDynamics groups =
568 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
569 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
570 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
572 --------------------------------------
573 mapNgrams :: Map Int (Date,Date)
574 mapNgrams = map (\dates ->
575 let dates' = sort dates
576 in (head' "dynamics" dates', last' "dynamics" dates'))
578 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
579 $ (g ^. phylo_groupNgrams))) [] groups
586 getGroupThr :: Double -> PhyloGroup -> Double
588 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
589 breaks = (g ^. phylo_groupMeta) ! "breaks"
590 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
592 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
593 toAncestor nbDocs diago proximity step candidates ego =
594 let curr = ego ^. phylo_groupAncestors
595 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
596 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
597 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
598 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
601 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
602 headsToAncestors nbDocs diago proximity step heads acc =
606 let ego = head' "headsToAncestors" heads
607 heads' = tail' "headsToAncestors" heads
608 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
611 toHorizon :: Phylo -> Phylo
613 let phyloAncestor = updatePhyloGroups
615 (fromList $ map (\g -> (getGroupId g, g))
617 $ tracePhyloAncestors newGroups) phylo
618 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
619 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
620 in updatePhyloGroups level reBranched phylo
622 -- | 1) for each periods
623 periods :: [PhyloPeriodId]
624 periods = getPeriodIds phylo
627 level = getLastLevel phylo
630 frame = getTimeFrame $ timeUnit $ getConfig phylo
631 -- | 2) find ancestors between groups without parents
632 mapGroups :: [[PhyloGroup]]
633 mapGroups = map (\prd ->
634 let groups = getGroupsFromLevelPeriods level [prd] phylo
635 childs = getPreviousChildIds level frame prd periods phylo
636 -- maybe add a better filter for non isolated ancestors
637 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
638 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
639 noHeads = groups \\ heads
640 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
641 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
642 proximity = (phyloProximity $ getConfig phylo)
643 step = case getSeaElevation phylo of
645 Adaptative _ -> undefined
646 -- in headsToAncestors nbDocs diago proximity heads groups []
647 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
648 $ headsToAncestors nbDocs diago proximity step heads []
650 -- | 3) process this task concurrently
651 newGroups :: [[PhyloGroup]]
652 newGroups = mapGroups `using` parList rdeepseq
653 --------------------------------------
655 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
656 getPreviousChildIds lvl frame curr prds phylo =
657 concat $ map ((map fst) . _phylo_groupPeriodChilds)
658 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
660 ---------------------
661 -- | phyloExport | --
662 ---------------------
664 toPhyloExport :: Phylo -> DotGraph DotId
665 toPhyloExport phylo = exportToDot phylo
666 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
667 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
668 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
669 $ processMetrics phylo export
671 export :: PhyloExport
672 export = PhyloExport groups branches
673 --------------------------------------
674 branches :: [PhyloBranch]
675 branches = map (\g ->
676 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
677 breaks = (g ^. phylo_groupMeta) ! "breaks"
678 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
679 in PhyloBranch (g ^. phylo_groupBranchId)
683 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
687 $ map (\gs -> head' "export" gs)
688 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
689 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
690 --------------------------------------
691 groups :: [PhyloGroup]
692 groups = traceExportGroups
694 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
695 $ tracePhyloInfo phylo
696 -- \$ toHorizon phylo
699 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
700 traceExportBranches branches = trace ("\n"
701 <> "-- | Export " <> show(length branches) <> " branches") branches
703 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
704 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
706 tracePhyloInfo :: Phylo -> Phylo
707 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
708 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
709 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
713 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
714 traceExportGroups groups = trace ("\n" <> "-- | Export "
715 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
716 <> show(length groups) <> " groups and "
717 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"