]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[text-api] first rewrite using Conduit
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
29 import Gargantext.Prelude
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Double -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Double] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
72
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
75
76 periodIdToDotId :: PhyloPeriodId -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
78
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
89 where
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
95 header :: H.Row
96 header =
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
99 <> (fromStrict " ( ")
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
106
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
118 ])
119
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129
130
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
148 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
149 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
150 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
151 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
152 ])
153
154
155 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
156 toDotEdge' source target thr w edgeType = edge source target
157 (case edgeType of
158 GroupToGroup -> undefined
159 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
160 BranchToGroup -> undefined
161 BranchToBranch -> undefined
162 GroupToAncestor -> undefined
163 PeriodToPeriod -> undefined)
164
165
166 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
167 toDotEdge source target lbl edgeType = edge source target
168 (case edgeType of
169 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
170 GroupToGroupMemory -> undefined
171 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
172 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
173 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
174 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
175
176
177 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
178 mergePointers groups =
179 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
180 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
181 in unionWith (\w w' -> max w w') toChilds toParents
182
183 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
184 mergePointersMemory groups =
185 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
186 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
187 in concat [toChilds,toParents]
188
189 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
190 mergeAncestors groups = concat
191 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
192 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
193
194
195 toBid :: PhyloGroup -> [PhyloBranch] -> Int
196 toBid g bs =
197 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
198 in fromJust $ elemIndex b' bs
199
200 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
201 exportToDot phylo export =
202 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
203 <> show(length $ export ^. export_groups) <> " groups "
204 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
205 <> "##########################") $
206 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
207
208 {- 1) init the dot graph -}
209 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
210 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
211 , Ratio FillRatio
212 , Style [SItem Filled []],Color [toWColor White]]
213 {-- home made attributes -}
214 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
215 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
216 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
217 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
218 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
219 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
220 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
221 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
222 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
223 ])
224
225 {-
226 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
227
228 -- 2) create a layer for the branches labels -}
229 subgraph (Str "Branches peaks") $ do
230
231 -- graphAttrs [Rank SameRank]
232 {-
233 -- 3) group the branches by hierarchy
234 -- mapM (\branches ->
235 -- subgraph (Str "Branches clade") $ do
236 -- graphAttrs [Rank SameRank]
237
238 -- -- 4) create a node for each branch
239 -- mapM branchToDotNode branches
240 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
241 -}
242 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
243
244 {-- 5) create a layer for each period -}
245 _ <- mapM (\period ->
246 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
247 graphAttrs [Rank SameRank]
248 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
249
250 {-- 6) create a node for each group -}
251 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
252 ) $ phylo ^. phylo_periods
253
254 {-- 7) create the edges between a branch and its first groups -}
255 _ <- mapM (\(bId,groups) ->
256 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
257 )
258 $ toList
259 $ map (\groups -> head' "toDot"
260 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
261 $ sortOn (fst . _phylo_groupPeriod) groups)
262 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
263
264 {- 8) create the edges between the groups -}
265 _ <- mapM (\((k,k'),v) ->
266 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
267 ) $ (toList . mergePointers) $ export ^. export_groups
268
269 {- 8-bis) create the edges between the groups -}
270 {- _ <- mapM (\((k,k'),v) ->
271 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
272 ) $ mergePointersMemory $ export ^. export_groups -}
273
274 _ <- mapM (\((k,k'),v) ->
275 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
276 ) $ mergeAncestors $ export ^. export_groups
277
278 -- 10) create the edges between the periods
279 _ <- mapM (\(prd,prd') ->
280 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
281 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
282
283 {- 8) create the edges between the branches
284 -- _ <- mapM (\(bId,bId') ->
285 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
286 -- (Text.pack $ show(branchIdsToProximity bId bId'
287 -- (getThresholdInit $ phyloProximity $ getConfig phylo)
288 -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
289 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
290 -}
291
292
293 graphAttrs [Rank SameRank]
294
295
296 ----------------
297 -- | Filter | --
298 ----------------
299
300 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
301 filterByBranchSize thr export =
302 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
303 in export & export_branches .~ (fst splited)
304 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
305
306
307 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
308 processFilters filters qua export =
309 foldl (\export' f -> case f of
310 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
311 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
312 else filterByBranchSize thr export'
313 ) export filters
314
315 --------------
316 -- | Sort | --
317 --------------
318
319 branchToIso :: [PhyloBranch] -> [PhyloBranch]
320 branchToIso branches =
321 let steps = map sum
322 $ inits
323 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
324 $ zip branches
325 $ ([0] ++ (map (\(b,b') ->
326 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
327 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
328 in
329 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
330 then (b' ^. branch_seaLevel) !! (lmin - 1)
331 else (b' ^. branch_seaLevel) !! (idx - 1)
332 ) $ listToSeq branches))
333 in map (\(x,b) -> b & branch_x .~ x)
334 $ zip steps branches
335
336 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
337 branchToIso' start step branches =
338 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
339 $ inits
340 $ ([0] ++ (map (\(b,b') ->
341 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
342 in 1 - start - step * root) $ listToSeq branches))
343 in map (\(x,b) -> b & branch_x .~ x)
344 $ zip bx branches
345
346
347 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
348 sortByHierarchy depth branches =
349 if (length branches == 1)
350 then branches
351 else concat
352 $ map (\branches' ->
353 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
354 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
355 ++ (sortByHierarchy (depth + 1) (snd partitions)))
356 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
357 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
358
359
360 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
361 sortByBirthDate order export =
362 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
363 branches' = case order of
364 Asc -> branches
365 Desc -> reverse branches
366 in export & export_branches .~ branches'
367
368 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
369 processSort sort' elev export = case sort' of
370 ByBirthDate o -> sortByBirthDate o export
371 ByHierarchy _ -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
372 $ sortByHierarchy 0 (export ^. export_branches))
373
374
375 -----------------
376 -- | Metrics | --
377 -----------------
378
379 -- | Return the conditional probability of i knowing j
380 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
381 conditional m i j = (findWithDefault 0 (i,j) m)
382 / (m ! (j,j))
383
384
385 -- | Return the genericity score of a given ngram
386 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
387 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
388 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
389
390
391 -- | Return the specificity score of a given ngram
392 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
393 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
394 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
395
396
397 -- | Return the inclusion score of a given ngram
398 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
399 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
400 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
401
402
403 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
404 ngramsMetrics phylo export =
405 over ( export_groups
406 . traverse )
407 (\g -> g & phylo_groupMeta %~ insert "genericity"
408 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
409 & phylo_groupMeta %~ insert "specificity"
410 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
411 & phylo_groupMeta %~ insert "inclusion"
412 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
413 & phylo_groupMeta %~ insert "frequence"
414 (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
415 ) export
416
417
418 branchDating :: PhyloExport -> PhyloExport
419 branchDating export =
420 over ( export_branches
421 . traverse )
422 (\b ->
423 let groups = sortOn fst
424 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
425 then acc ++ [g ^. phylo_groupPeriod]
426 else acc ) [] $ export ^. export_groups
427 periods = nub groups
428 birth = fst $ head' "birth" groups
429 age = (snd $ last' "age" groups) - birth
430 in b & branch_meta %~ insert "birth" [fromIntegral birth]
431 & branch_meta %~ insert "age" [fromIntegral age]
432 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
433
434 processMetrics :: Phylo -> PhyloExport -> PhyloExport
435 processMetrics phylo export = ngramsMetrics phylo
436 $ branchDating export
437
438
439 -----------------
440 -- | Taggers | --
441 -----------------
442
443 nk :: Int -> [[Int]] -> Int
444 nk n groups = sum
445 $ map (\g -> if (elem n g)
446 then 1
447 else 0) groups
448
449
450 tf :: Int -> [[Int]] -> Double
451 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
452
453
454 idf :: Int -> [[Int]] -> Double
455 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
456
457
458 findTfIdf :: [[Int]] -> [(Int,Double)]
459 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
460
461
462 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
463 findEmergences groups freq =
464 let ngrams = map _phylo_groupNgrams groups
465 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
466 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
467 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
468 in reverse $ sortOn snd
469 $ map (\(n,_) -> if (member n freq)
470 then (n,freq ! n)
471 else (n,0)) emerging
472
473
474 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
475 mostEmergentTfIdf nth freq foundations export =
476 over ( export_branches
477 . traverse )
478 (\b ->
479 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
480 tfidf = findTfIdf (map _phylo_groupNgrams groups)
481 emergences = findEmergences groups freq
482 selected = if (null emergences)
483 then map fst $ take nth tfidf
484 else [fst $ head' "mostEmergentTfIdf" emergences]
485 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
486 in b & branch_label .~ (ngramsToLabel foundations selected)) export
487
488
489 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
490 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
491 $ take nth
492 $ reverse
493 $ sortOn snd $ zip [0..] meta
494
495
496 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
497 mostInclusive nth foundations export =
498 over ( export_branches
499 . traverse )
500 (\b ->
501 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
502 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
503 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
504 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
505 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
506 in b & branch_label .~ lbl ) export
507
508
509 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
510 mostEmergentInclusive nth foundations export =
511 over ( export_groups
512 . traverse )
513 (\g ->
514 let lbl = ngramsToLabel foundations
515 $ take nth
516 $ map (\(_,(_,idx)) -> idx)
517 $ concat
518 $ map (\groups -> sortOn (fst . snd) groups)
519 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
520 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
521 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
522 in g & phylo_groupLabel .~ lbl ) export
523
524
525 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
526 processLabels labels foundations freq export =
527 foldl (\export' label ->
528 case label of
529 GroupLabel tagger nth ->
530 case tagger of
531 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
532 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
533 BranchLabel tagger nth ->
534 case tagger of
535 MostInclusive -> mostInclusive nth foundations export'
536 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
537 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
538
539
540 ------------------
541 -- | Dynamics | --
542 ------------------
543
544
545 toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
546 toDynamics n parents g m =
547 let prd = g ^. phylo_groupPeriod
548 end = last' "dynamics" (sort $ map snd $ elems m)
549 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
550 {- decrease -}
551 then 2
552 else if ((fst prd) == (fst $ m ! n))
553 {- emerging -}
554 then 0
555 else if isNew
556 {- emergence -}
557 then 1
558 else 3
559 where
560 --------------------------------------
561 isNew :: Bool
562 isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
563
564
565 processDynamics :: [PhyloGroup] -> [PhyloGroup]
566 processDynamics groups =
567 map (\g ->
568 let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
569 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
570 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
571 where
572 --------------------------------------
573 mapNgrams :: Map Int (Date,Date)
574 mapNgrams = map (\dates ->
575 let dates' = sort dates
576 in (head' "dynamics" dates', last' "dynamics" dates'))
577 $ fromListWith (++)
578 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
579 $ (g ^. phylo_groupNgrams))) [] groups
580
581
582 -----------------
583 -- | horizon | --
584 -----------------
585
586 getGroupThr :: Double -> PhyloGroup -> Double
587 getGroupThr step g =
588 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
589 breaks = (g ^. phylo_groupMeta) ! "breaks"
590 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
591
592 toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
593 toAncestor nbDocs diago proximity step candidates ego =
594 let curr = ego ^. phylo_groupAncestors
595 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
596 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
597 $ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
598 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
599
600
601 headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
602 headsToAncestors nbDocs diago proximity step heads acc =
603 if (null heads)
604 then acc
605 else
606 let ego = head' "headsToAncestors" heads
607 heads' = tail' "headsToAncestors" heads
608 in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
609
610
611 toHorizon :: Phylo -> Phylo
612 toHorizon phylo =
613 let phyloAncestor = updatePhyloGroups
614 level
615 (fromList $ map (\g -> (getGroupId g, g))
616 $ concat
617 $ tracePhyloAncestors newGroups) phylo
618 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
619 $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
620 in updatePhyloGroups level reBranched phylo
621 where
622 -- | 1) for each periods
623 periods :: [PhyloPeriodId]
624 periods = getPeriodIds phylo
625 -- --
626 level :: Level
627 level = getLastLevel phylo
628 -- --
629 frame :: Int
630 frame = getTimeFrame $ timeUnit $ getConfig phylo
631 -- | 2) find ancestors between groups without parents
632 mapGroups :: [[PhyloGroup]]
633 mapGroups = map (\prd ->
634 let groups = getGroupsFromLevelPeriods level [prd] phylo
635 childs = getPreviousChildIds level frame prd periods phylo
636 -- maybe add a better filter for non isolated ancestors
637 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
638 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
639 noHeads = groups \\ heads
640 nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
641 diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
642 proximity = (phyloProximity $ getConfig phylo)
643 step = case getSeaElevation phylo of
644 Constante _ s -> s
645 Adaptative _ -> undefined
646 -- in headsToAncestors nbDocs diago proximity heads groups []
647 in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
648 $ headsToAncestors nbDocs diago proximity step heads []
649 ) periods
650 -- | 3) process this task concurrently
651 newGroups :: [[PhyloGroup]]
652 newGroups = mapGroups `using` parList rdeepseq
653 --------------------------------------
654
655 getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
656 getPreviousChildIds lvl frame curr prds phylo =
657 concat $ map ((map fst) . _phylo_groupPeriodChilds)
658 $ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
659
660 ---------------------
661 -- | phyloExport | --
662 ---------------------
663
664 toPhyloExport :: Phylo -> DotGraph DotId
665 toPhyloExport phylo = exportToDot phylo
666 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
667 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
668 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
669 $ processMetrics phylo export
670 where
671 export :: PhyloExport
672 export = PhyloExport groups branches
673 --------------------------------------
674 branches :: [PhyloBranch]
675 branches = map (\g ->
676 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
677 breaks = (g ^. phylo_groupMeta) ! "breaks"
678 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
679 in PhyloBranch (g ^. phylo_groupBranchId)
680 canonId
681 seaLvl
682 0
683 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
684 0
685 0
686 "" empty)
687 $ map (\gs -> head' "export" gs)
688 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
689 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
690 --------------------------------------
691 groups :: [PhyloGroup]
692 groups = traceExportGroups
693 $ processDynamics
694 $ getGroupsFromLevel (phyloLevel $ getConfig phylo)
695 $ tracePhyloInfo phylo
696 -- \$ toHorizon phylo
697
698
699 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
700 traceExportBranches branches = trace ("\n"
701 <> "-- | Export " <> show(length branches) <> " branches") branches
702
703 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
704 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
705
706 tracePhyloInfo :: Phylo -> Phylo
707 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
708 <> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
709 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
710 ) phylo
711
712
713 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
714 traceExportGroups groups = trace ("\n" <> "-- | Export "
715 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
716 <> show(length groups) <> " groups and "
717 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
718 ) groups
719