]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[FEAT] Annuaire pairing, full firstname
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Double -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Double] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
72
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
75
76 periodIdToDotId :: PhyloPeriodId -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
78
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
89 where
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
95 header :: H.Row
96 header =
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
99 <> (fromStrict " ( ")
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
106
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
118 ])
119
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129
130
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
152 ])
153
154
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
157 (case edgeType of
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
164
165
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
168 (case edgeType of
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
175
176
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
182
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
188
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
193
194
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
196 toBid g bs =
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
199
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
207
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
211 , Ratio FillRatio
212 -- , Ratio AutoRatio
213 , Style [SItem Filled []],Color [toWColor White]]
214 {-- home made attributes -}
215 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
216 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
217 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
218 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
219 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
220 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
221 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
222 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
223 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
224 ])
225
226 {-
227 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
228
229 -- 2) create a layer for the branches labels -}
230 subgraph (Str "Branches peaks") $ do
231
232 -- graphAttrs [Rank SameRank]
233 {-
234 -- 3) group the branches by hierarchy
235 -- mapM (\branches ->
236 -- subgraph (Str "Branches clade") $ do
237 -- graphAttrs [Rank SameRank]
238
239 -- -- 4) create a node for each branch
240 -- mapM branchToDotNode branches
241 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
242 -}
243 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
244
245 {-- 5) create a layer for each period -}
246 _ <- mapM (\period ->
247 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
248 graphAttrs [Rank SameRank]
249 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
250
251 {-- 6) create a node for each group -}
252 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
253 ) $ phylo ^. phylo_periods
254
255 {-- 7) create the edges between a branch and its first groups -}
256 _ <- mapM (\(bId,groups) ->
257 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
258 )
259 $ toList
260 $ map (\groups -> head' "toDot"
261 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
262 $ sortOn (fst . _phylo_groupPeriod) groups)
263 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
264
265 {- 8) create the edges between the groups -}
266 _ <- mapM (\((k,k'),v) ->
267 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
268 ) $ (toList . mergePointers) $ export ^. export_groups
269
270 {- 8-bis) create the edges between the groups -}
271 {- _ <- mapM (\((k,k'),v) ->
272 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
273 ) $ mergePointersMemory $ export ^. export_groups -}
274
275 _ <- mapM (\((k,k'),v) ->
276 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
277 ) $ mergeAncestors $ export ^. export_groups
278
279 -- 10) create the edges between the periods
280 _ <- mapM (\(prd,prd') ->
281 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
282 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
283
284 {- 8) create the edges between the branches
285 -- _ <- mapM (\(bId,bId') ->
286 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
287 -- (Text.pack $ show(branchIdsToProximity bId bId'
288 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
289 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
290 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
291 -}
292
293
294 graphAttrs [Rank SameRank]
295
296
297 ----------------
298 -- | Filter | --
299 ----------------
300
301 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
302 filterByBranchSize thr export =
303 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
304 in export & export_branches .~ (fst splited)
305 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
306
307
308 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
309 processFilters filters qua export =
310 foldl (\export' f -> case f of
311 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
312 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
313 else filterByBranchSize thr export'
314 ) export filters
315
316 --------------
317 -- | Sort | --
318 --------------
319
320 branchToIso :: [PhyloBranch] -> [PhyloBranch]
321 branchToIso branches =
322 let steps = map sum
323 $ inits
324 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
325 $ zip branches
326 $ ([0] ++ (map (\(b,b') ->
327 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
328 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
329 in
330 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
331 then (b' ^. branch_seaLevel) !! (lmin - 1)
332 else (b' ^. branch_seaLevel) !! (idx - 1)
333 ) $ listToSeq branches))
334 in map (\(x,b) -> b & branch_x .~ x)
335 $ zip steps branches
336
337 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
338 branchToIso' start step branches =
339 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
340 $ inits
341 $ ([0] ++ (map (\(b,b') ->
342 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
343 in 1 - start - step * root) $ listToSeq branches))
344 in map (\(x,b) -> b & branch_x .~ x)
345 $ zip bx branches
346
347
348 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
349 sortByHierarchy depth branches =
350 if (length branches == 1)
351 then branches
352 else concat
353 $ map (\branches' ->
354 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
355 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
356 ++ (sortByHierarchy (depth + 1) (snd partitions)))
357 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
358 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
359
360
361 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
362 sortByBirthDate order export =
363 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
364 branches' = case order of
365 Asc -> branches
366 Desc -> reverse branches
367 in export & export_branches .~ branches'
368
369 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
370 processSort sort' elev export = case sort' of
371 ByBirthDate o -> sortByBirthDate o export
372 ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
373 $ sortByHierarchy 0 (export ^. export_branches))
374
375
376 -----------------
377 -- | Metrics | --
378 -----------------
379
380 -- | Return the conditional probability of i knowing j
381 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
382 conditional m i j = (findWithDefault 0 (i,j) m)
383 / (m ! (j,j))
384
385
386 -- | Return the genericity score of a given ngram
387 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
388 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
389 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
390
391
392 -- | Return the specificity score of a given ngram
393 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
394 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
395 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
396
397
398 -- | Return the inclusion score of a given ngram
399 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
400 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
401 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
402
403
404 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
405 ngramsMetrics phylo export =
406 over ( export_groups
407 . traverse )
408 (\g -> g & phylo_groupMeta %~ insert "genericity"
409 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
410 & phylo_groupMeta %~ insert "specificity"
411 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
412 & phylo_groupMeta %~ insert "inclusion"
413 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
414 & phylo_groupMeta %~ insert "frequence"
415 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
416 ) export
417
418
419 branchDating :: PhyloExport -> PhyloExport
420 branchDating export =
421 over ( export_branches
422 . traverse )
423 (\b ->
424 let groups = sortOn fst
425 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
426 then acc ++ [g ^. phylo_groupPeriod]
427 else acc ) [] $ export ^. export_groups
428 periods = nub groups
429 birth = fst $ head' "birth" groups
430 age = (snd $ last' "age" groups) - birth
431 in b & branch_meta %~ insert "birth" [fromIntegral birth]
432 & branch_meta %~ insert "age" [fromIntegral age]
433 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
434
435 processMetrics :: Phylo -> PhyloExport -> PhyloExport
436 processMetrics phylo export = ngramsMetrics phylo
437 $ branchDating export
438
439
440 -----------------
441 -- | Taggers | --
442 -----------------
443
444 nk :: Int -> [[Int]] -> Int
445 nk n groups = sum
446 $ map (\g -> if (elem n g)
447 then 1
448 else 0) groups
449
450
451 tf :: Int -> [[Int]] -> Double
452 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
453
454
455 idf :: Int -> [[Int]] -> Double
456 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
457
458
459 findTfIdf :: [[Int]] -> [(Int,Double)]
460 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
461
462
463 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
464 findEmergences groups freq =
465 let ngrams = map _phylo_groupNgrams groups
466 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
467 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
468 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
469 in reverse $ sortOn snd
470 $ map (\(n,_) -> if (member n freq)
471 then (n,freq ! n)
472 else (n,0)) emerging
473
474
475 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
476 mostEmergentTfIdf nth freq foundations export =
477 over ( export_branches
478 . traverse )
479 (\b ->
480 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
481 tfidf = findTfIdf (map _phylo_groupNgrams groups)
482 emergences = findEmergences groups freq
483 selected = if (null emergences)
484 then map fst $ take nth tfidf
485 else [fst $ head' "mostEmergentTfIdf" emergences]
486 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
487 in b & branch_label .~ (ngramsToLabel foundations selected)) export
488
489
490 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
491 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
492 $ take nth
493 $ reverse
494 $ sortOn snd $ zip [0..] meta
495
496
497 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
498 mostInclusive nth foundations export =
499 over ( export_branches
500 . traverse )
501 (\b ->
502 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
503 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
504 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
505 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
506 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
507 in b & branch_label .~ lbl ) export
508
509
510 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
511 mostEmergentInclusive nth foundations export =
512 over ( export_groups
513 . traverse )
514 (\g ->
515 let lbl = ngramsToLabel foundations
516 $ take nth
517 $ map (\(_,(_,idx)) -> idx)
518 $ concat
519 $ map (\groups -> sortOn (fst . snd) groups)
520 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
521 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
522 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
523 in g & phylo_groupLabel .~ lbl ) export
524
525
526 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
527 processLabels labels foundations freq export =
528 foldl (\export' label ->
529 case label of
530 GroupLabel tagger nth ->
531 case tagger of
532 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
533 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
534 BranchLabel tagger nth ->
535 case tagger of
536 MostInclusive -> mostInclusive nth foundations export'
537 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
538 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
539
540
541 ------------------
542 -- | Dynamics | --
543 ------------------
544
545
546 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
547 toDynamics n parents g m =
548 let prd = g ^. phylo_groupPeriod
549 end = last' "dynamics" (sort $ map snd $ elems m)
550 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
551 {- decrease -}
552 then 2
553 else if ((fst prd) == (fst $ m ! n))
554 {- emerging -}
555 then 0
556 else if isNew
557 {- emergence -}
558 then 1
559 else 3
560 where
561 --------------------------------------
562 isNew :: Bool
563 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
564
565
566 processDynamics :: [PhyloGroup] -> [PhyloGroup]
567 processDynamics groups =
568 map (\g ->
569 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
570 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
571 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
572 where
573 --------------------------------------
574 mapNgrams :: Map Int (Date,Date)
575 mapNgrams = map (\dates ->
576 let dates' = sort dates
577 in (head' "dynamics" dates', last' "dynamics" dates'))
578 $ fromListWith (++)
579 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
580 $ (g ^. phylo_groupNgrams))) [] groups
581
582
583 -----------------
584 -- | horizon | --
585 -----------------
586
587 getGroupThr :: Double -> PhyloGroup -> Double
588 getGroupThr step g =
589 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
590 breaks = (g ^. phylo_groupMeta) ! "breaks"
591 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
592
593 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
594 toAncestor nbDocs diago proximity step candidates ego =
595 let curr = ego ^. phylo_groupAncestors
596 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
597 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
598 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
599 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
600
601
602 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
603 headsToAncestors nbDocs diago proximity step heads acc =
604 if (null heads)
605 then acc
606 else
607 let ego = head' "headsToAncestors" heads
608 heads' = tail' "headsToAncestors" heads
609 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
610
611
612 toHorizon :: Phylo -> Phylo
613 toHorizon phylo =
614 let phyloAncestor = updatePhyloGroups
615 level
616 (fromList $ map (\g -> (getGroupId g, g))
617 $ concat
618 $ tracePhyloAncestors newGroups) phylo
619 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
620 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
621 in updatePhyloGroups level reBranched phylo
622 where
623 -- | 1) for each periods
624 periods :: [PhyloPeriodId]
625 periods = getPeriodIds phylo
626 -- --
627 level :: Level
628 level = getLastLevel phylo
629 -- --
630 frame :: Int
631 frame = getTimeFrame $ timeUnit $ getConfig phylo
632 -- | 2) find ancestors between groups without parents
633 mapGroups :: [[PhyloGroup]]
634 mapGroups = map (\prd ->
635 let groups = getGroupsFromLevelPeriods level [prd] phylo
636 childs = getPreviousChildIds level frame prd periods phylo
637 -- maybe add a better filter for non isolated ancestors
638 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
639 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
640 noHeads = groups \\ heads
641 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
642 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
643 proximity = (phyloProximity $ getConfig phylo)
644 step = case getSeaElevation phylo of
645 Constante _ s -> s
646 Adaptative _ -> undefined
647 -- in headsToAncestors nbDocs diago proximity heads groups []
648 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
649 $ headsToAncestors nbDocs diago proximity step heads []
650 ) periods
651 -- | 3) process this task concurrently
652 newGroups :: [[PhyloGroup]]
653 newGroups = mapGroups `using` parList rdeepseq
654 --------------------------------------
655
656 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
657 getPreviousChildIds lvl frame curr prds phylo =
658 concat $ map ((map fst) . _phylo_groupPeriodChilds)
659 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
660
661 ---------------------
662 -- | phyloExport | --
663 ---------------------
664
665 toPhyloExport :: Phylo -> DotGraph DotId
666 toPhyloExport phylo = exportToDot phylo
667 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
668 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
669 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
670 $ processMetrics phylo export
671 where
672 export :: PhyloExport
673 export = PhyloExport groups branches
674 --------------------------------------
675 branches :: [PhyloBranch]
676 branches = map (\g ->
677 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
678 breaks = (g ^. phylo_groupMeta) ! "breaks"
679 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
680 in PhyloBranch (g ^. phylo_groupBranchId)
681 canonId
682 seaLvl
683 0
684 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
685 0
686 0
687 "" empty)
688 $ map (\gs -> head' "export" gs)
689 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
690 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
691 --------------------------------------
692 groups :: [PhyloGroup]
693 groups = traceExportGroups
694 $ processDynamics
695 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
696 $ tracePhyloInfo phylo
697 -- \$ toHorizon phylo
698
699
700 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
701 traceExportBranches branches = trace ("\n"
702 <> "-- | Export " <> show(length branches) <> " branches") branches
703
704 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
705 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
706
707 tracePhyloInfo :: Phylo -> Phylo
708 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
709 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
710 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
711 ) phylo
712
713
714 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
715 traceExportGroups groups = trace ("\n" <> "-- | Export "
716 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
717 <> show(length groups) <> " groups and "
718 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
719 ) groups
720