2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# LANGUAGE TypeSynonymInstances #-}
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
47 dynamicToColor :: Double -> H.Attribute
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
54 pickLabelColor :: [Double] -> H.Attribute
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
76 periodIdToDotId :: Period -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
213 , Style [SItem Filled []],Color [toWColor White]]
214 {-- home made attributes -}
215 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
216 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
217 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
218 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
219 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
220 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
221 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
222 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
223 ,(toAttr (fromStrict "PhyloScale") $ pack $ show (_qua_granularity $ phyloQuality $ getConfig phylo))
224 ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
225 ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
226 ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
227 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
231 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
233 -- 2) create a layer for the branches labels -}
234 subgraph (Str "Branches peaks") $ do
236 -- graphAttrs [Rank SameRank]
238 -- 3) group the branches by hierarchy
239 -- mapM (\branches ->
240 -- subgraph (Str "Branches clade") $ do
241 -- graphAttrs [Rank SameRank]
243 -- -- 4) create a node for each branch
244 -- mapM branchToDotNode branches
245 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
247 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
249 {-- 5) create a layer for each period -}
250 _ <- mapM (\period ->
251 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
252 graphAttrs [Rank SameRank]
253 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
255 {-- 6) create a node for each group -}
256 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
257 ) $ phylo ^. phylo_periods
259 {-- 7) create the edges between a branch and its first groups -}
260 _ <- mapM (\(bId,groups) ->
261 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
264 $ map (\groups -> head' "toDot"
265 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
266 $ sortOn (fst . _phylo_groupPeriod) groups)
267 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
269 {- 8) create the edges between the groups -}
270 _ <- mapM (\((k,k'),v) ->
271 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
272 ) $ (toList . mergePointers) $ export ^. export_groups
274 {- 8-bis) create the edges between the groups -}
275 {- _ <- mapM (\((k,k'),v) ->
276 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
277 ) $ mergePointersMemory $ export ^. export_groups -}
279 _ <- mapM (\((k,k'),v) ->
280 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
281 ) $ mergeAncestors $ export ^. export_groups
283 -- 10) create the edges between the periods
284 _ <- mapM (\(prd,prd') ->
285 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
286 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
288 {- 8) create the edges between the branches
289 -- _ <- mapM (\(bId,bId') ->
290 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
291 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
292 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
293 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
294 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
298 graphAttrs [Rank SameRank]
305 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
306 filterByBranchSize thr export =
307 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
308 in export & export_branches .~ (fst splited)
309 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
312 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
313 processFilters filters qua export =
314 foldl (\export' f -> case f of
315 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
316 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
317 else filterByBranchSize thr export'
324 branchToIso :: [PhyloBranch] -> [PhyloBranch]
325 branchToIso branches =
328 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
330 $ ([0] ++ (map (\(b,b') ->
331 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
332 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
334 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
335 then (b' ^. branch_seaLevel) !! (lmin - 1)
336 else (b' ^. branch_seaLevel) !! (idx - 1)
337 ) $ listToSeq branches))
338 in map (\(x,b) -> b & branch_x .~ x)
341 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
342 branchToIso' start step branches =
343 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
345 $ ([0] ++ (map (\(b,b') ->
346 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
347 in 1 - start - step * root) $ listToSeq branches))
348 in map (\(x,b) -> b & branch_x .~ x)
352 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
353 sortByHierarchy depth branches =
354 if (length branches == 1)
358 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
359 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
360 ++ (sortByHierarchy (depth + 1) (snd partitions)))
361 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
362 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
365 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
366 sortByBirthDate order export =
367 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
368 branches' = case order of
370 Desc -> reverse branches
371 in export & export_branches .~ branches'
373 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
374 processSort sort' elev export = case sort' of
375 ByBirthDate o -> sortByBirthDate o export
376 ByHierarchy _ -> case elev of
377 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
378 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
384 -- | Return the conditional probability of i knowing j
385 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
386 conditional m i j = (findWithDefault 0 (i,j) m)
390 -- | Return the genericity score of a given ngram
391 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
392 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
393 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
396 -- | Return the specificity score of a given ngram
397 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
398 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
399 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
402 -- | Return the inclusion score of a given ngram
403 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
404 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
405 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
408 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
409 ngramsMetrics phylo export =
412 (\g -> g & phylo_groupMeta %~ insert "genericity"
413 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
414 & phylo_groupMeta %~ insert "specificity"
415 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
416 & phylo_groupMeta %~ insert "inclusion"
417 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
418 & phylo_groupMeta %~ insert "frequence"
419 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
423 branchDating :: PhyloExport -> PhyloExport
424 branchDating export =
425 over ( export_branches
428 let groups = sortOn fst
429 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
430 then acc ++ [g ^. phylo_groupPeriod]
431 else acc ) [] $ export ^. export_groups
433 birth = fst $ head' "birth" groups
434 age = (snd $ last' "age" groups) - birth
435 in b & branch_meta %~ insert "birth" [fromIntegral birth]
436 & branch_meta %~ insert "age" [fromIntegral age]
437 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
439 processMetrics :: Phylo -> PhyloExport -> PhyloExport
440 processMetrics phylo export = ngramsMetrics phylo
441 $ branchDating export
448 nk :: Int -> [[Int]] -> Int
450 $ map (\g -> if (elem n g)
455 tf :: Int -> [[Int]] -> Double
456 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
459 idf :: Int -> [[Int]] -> Double
460 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
463 findTfIdf :: [[Int]] -> [(Int,Double)]
464 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
467 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
468 findEmergences groups freq =
469 let ngrams = map _phylo_groupNgrams groups
470 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
471 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
472 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
473 in reverse $ sortOn snd
474 $ map (\(n,_) -> if (member n freq)
479 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
480 mostEmergentTfIdf nth freq foundations export =
481 over ( export_branches
484 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
485 tfidf = findTfIdf (map _phylo_groupNgrams groups)
486 emergences = findEmergences groups freq
487 selected = if (null emergences)
488 then map fst $ take nth tfidf
489 else [fst $ head' "mostEmergentTfIdf" emergences]
490 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
491 in b & branch_label .~ (ngramsToLabel foundations selected)) export
494 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
495 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
498 $ sortOn snd $ zip [0..] meta
501 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
502 mostInclusive nth foundations export =
503 over ( export_branches
506 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
507 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
508 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
509 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
510 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
511 in b & branch_label .~ lbl ) export
514 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
515 mostEmergentInclusive nth foundations export =
519 let lbl = ngramsToLabel foundations
521 $ map (\(_,(_,idx)) -> idx)
523 $ map (\groups -> sortOn (fst . snd) groups)
524 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
525 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
526 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
527 in g & phylo_groupLabel .~ lbl ) export
530 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
531 processLabels labels foundations freq export =
532 foldl (\export' label ->
534 GroupLabel tagger nth ->
536 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
537 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
538 BranchLabel tagger nth ->
540 MostInclusive -> mostInclusive nth foundations export'
541 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
542 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
549 -- utiliser & creer une Map FdtId [PhyloGroup]
550 -- n = index of the current term
551 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
552 toDynamics n elders g m =
553 let prd = g ^. phylo_groupPeriod
554 end = last' "dynamics" (sort $ map snd $ elems m)
555 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
558 else if ((fst prd) == (fst $ m ! n))
566 --------------------------------------
568 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
571 processDynamics :: [PhyloGroup] -> [PhyloGroup]
572 processDynamics groups =
574 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
575 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
576 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
578 --------------------------------------
579 mapNgrams :: Map FdtId (Date,Date)
580 mapNgrams = map (\dates ->
581 let dates' = sort dates
582 in (head' "dynamics" dates', last' "dynamics" dates'))
584 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
585 $ (g ^. phylo_groupNgrams))) [] groups
592 getGroupThr :: Double -> PhyloGroup -> Double
594 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
595 breaks = (g ^. phylo_groupMeta) ! "breaks"
596 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
598 toAncestor :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
599 toAncestor nbDocs diago similarity step candidates ego =
600 let curr = ego ^. phylo_groupAncestors
601 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
602 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
603 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
604 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
607 headsToAncestors :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
608 headsToAncestors nbDocs diago similarity step heads acc =
612 let ego = head' "headsToAncestors" heads
613 heads' = tail' "headsToAncestors" heads
614 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
617 toHorizon :: Phylo -> Phylo
619 let phyloAncestor = updatePhyloGroups
621 (fromList $ map (\g -> (getGroupId g, g))
623 $ tracePhyloAncestors newGroups) phylo
624 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
625 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
626 in updatePhyloGroups scale reBranched phylo
628 -- | 1) for each periods
630 periods = getPeriodIds phylo
633 scale = getLastLevel phylo
636 frame = getTimeFrame $ timeUnit $ getConfig phylo
637 -- | 2) find ancestors between groups without parents
638 mapGroups :: [[PhyloGroup]]
639 mapGroups = map (\prd ->
640 let groups = getGroupsFromScalePeriods scale [prd] phylo
641 childs = getPreviousChildIds scale frame prd periods phylo
642 -- maybe add a better filter for non isolated ancestors
643 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
644 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
645 noHeads = groups \\ heads
646 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
647 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
648 sim = (similarity $ getConfig phylo)
649 step = case getSeaElevation phylo of
652 -- in headsToAncestors nbDocs diago Similarity heads groups []
653 in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
654 $ headsToAncestors nbDocs diago sim step heads []
656 -- | 3) process this task concurrently
657 newGroups :: [[PhyloGroup]]
658 newGroups = mapGroups `using` parList rdeepseq
659 --------------------------------------
661 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
662 getPreviousChildIds lvl frame curr prds phylo =
663 concat $ map ((map fst) . _phylo_groupPeriodChilds)
664 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
666 ---------------------
667 -- | phyloExport | --
668 ---------------------
670 toPhyloExport :: Phylo -> DotGraph DotId
671 toPhyloExport phylo = exportToDot phylo
672 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
673 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
674 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
675 $ processMetrics phylo export
677 export :: PhyloExport
678 export = PhyloExport groups branches
679 --------------------------------------
680 branches :: [PhyloBranch]
681 branches = map (\g ->
682 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
683 breaks = (g ^. phylo_groupMeta) ! "breaks"
684 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
685 in PhyloBranch (g ^. phylo_groupBranchId)
689 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
693 $ map (\gs -> head' "export" gs)
694 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
695 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
696 --------------------------------------
697 groups :: [PhyloGroup]
698 groups = traceExportGroups
701 $ getGroupsFromScale (phyloScale $ getConfig phylo)
702 $ tracePhyloInfo phylo
705 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
706 traceExportBranches branches = trace ("\n"
707 <> "-- | Export " <> show(length branches) <> " branches") branches
709 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
710 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
712 tracePhyloInfo :: Phylo -> Phylo
713 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
714 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
715 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
719 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
720 traceExportGroups groups = trace ("\n" <> "-- | Export "
721 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
722 <> show(length groups) <> " groups and "
723 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"