]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
add Callon's density
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import System.FilePath
32 import qualified Data.GraphViz.Attributes.HTML as H
33 import qualified Data.Text as Text
34 import qualified Data.Text.Lazy as Lazy
35 import qualified Data.Vector as Vector
36
37 --------------------
38 -- | Dot export | --
39 --------------------
40
41 dotToFile :: FilePath -> DotGraph DotId -> IO ()
42 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
43
44 dotToString :: DotGraph DotId -> [Char]
45 dotToString dotG = unpack (printDotGraph dotG)
46
47 dynamicToColor :: Double -> H.Attribute
48 dynamicToColor d
49 | d == 0 = H.BGColor (toColor LightCoral)
50 | d == 1 = H.BGColor (toColor Khaki)
51 | d == 2 = H.BGColor (toColor SkyBlue)
52 | otherwise = H.Color (toColor Black)
53
54 pickLabelColor :: [Double] -> H.Attribute
55 pickLabelColor lst
56 | elem 0 lst = dynamicToColor 0
57 | elem 2 lst = dynamicToColor 2
58 | elem 1 lst = dynamicToColor 1
59 | otherwise = dynamicToColor 3
60
61 toDotLabel :: Text.Text -> Label
62 toDotLabel lbl = StrLabel $ fromStrict lbl
63
64 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
65 toAttr k v = customAttribute k v
66
67 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
68 metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
69
70 groupIdToDotId :: PhyloGroupId -> DotId
71 groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
72
73 branchIdToDotId :: PhyloBranchId -> DotId
74 branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
75
76 periodIdToDotId :: Period -> DotId
77 periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
78
79 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
80 groupToTable fdt g = H.Table H.HTable
81 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
82 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
83 , H.tableRows = [header]
84 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
85 <> ( map ngramsToRow $ splitEvery 4
86 $ reverse $ sortOn (snd . snd)
87 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
88 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
89 where
90 --------------------------------------
91 ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
92 ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
93 H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
94 --------------------------------------
95 header :: H.Row
96 header =
97 H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
98 $ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
99 <> (fromStrict " ( ")
100 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
101 <> (fromStrict " , ")
102 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
103 <> (fromStrict " ) ")
104 <> (pack $ show (getGroupId g)))]]
105 --------------------------------------
106
107 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
108 branchToDotNode b bId =
109 node (branchIdToDotId $ b ^. branch_id)
110 ([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
111 <> (metaToAttr $ b ^. branch_meta)
112 <> [ toAttr "nodeType" "branch"
113 , toAttr "bId" (pack $ show bId)
114 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
115 , toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
116 , toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
117 , toAttr "label" (pack $ show $ b ^. branch_label)
118 ])
119
120 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
121 periodToDotNode prd prd' =
122 node (periodIdToDotId prd)
123 ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
124 <> [ toAttr "nodeType" "period"
125 , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
126 , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
127 , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
128 , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
129
130
131 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
132 groupToDotNode fdt g bId =
133 node (groupIdToDotId $ getGroupId g)
134 ([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
135 <> [ toAttr "nodeType" "group"
136 , toAttr "gid" (groupIdToDotId $ getGroupId g)
137 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
138 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
139 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
140 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
141 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
142 , toAttr "bId" (pack $ show bId)
143 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
144 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
145 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
146 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
147 , toAttr "density" (pack $ show (g ^. phylo_groupDensity))
148 , toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
149 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
150 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
151 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
152 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
153 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
154 ])
155
156
157 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
158 toDotEdge' source target thr w edgeType = edge source target
159 (case edgeType of
160 GroupToGroup -> undefined
161 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
162 BranchToGroup -> undefined
163 BranchToBranch -> undefined
164 GroupToAncestor -> undefined
165 PeriodToPeriod -> undefined)
166
167
168 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
169 toDotEdge source target lbl edgeType = edge source target
170 (case edgeType of
171 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
172 GroupToGroupMemory -> undefined
173 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
174 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
175 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
176 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
177
178
179 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
180 mergePointers groups =
181 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
182 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
183 in unionWith (\w w' -> max w w') toChilds toParents
184
185 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
186 mergePointersMemory groups =
187 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
188 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
189 in concat [toChilds,toParents]
190
191 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
192 mergeAncestors groups = concat
193 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
194 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
195
196
197 toBid :: PhyloGroup -> [PhyloBranch] -> Int
198 toBid g bs =
199 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
200 in fromJust $ elemIndex b' bs
201
202 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
203 exportToDot phylo export =
204 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
205 <> show(length $ export ^. export_groups) <> " groups "
206 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
207 <> "##########################") $
208 digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
209
210 {- 1) init the dot graph -}
211 graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
212 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
213 , Ratio FillRatio
214 -- , Ratio AutoRatio
215 , Style [SItem Filled []],Color [toWColor White]]
216 {-- home made attributes -}
217 <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
218 ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
219 ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo))
220 ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
221 ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
222 ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
223 ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
224 ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
225 ,(toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo))
226 ,(toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality))
227 ,(toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo))
228 ,(toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo))
229 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
230 ])
231
232 {-
233 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
234
235 -- 2) create a layer for the branches labels -}
236 subgraph (Str "Branches peaks") $ do
237
238 -- graphAttrs [Rank SameRank]
239 {-
240 -- 3) group the branches by hierarchy
241 -- mapM (\branches ->
242 -- subgraph (Str "Branches clade") $ do
243 -- graphAttrs [Rank SameRank]
244
245 -- -- 4) create a node for each branch
246 -- mapM branchToDotNode branches
247 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
248 -}
249 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
250
251 {-- 5) create a layer for each period -}
252 _ <- mapM (\period ->
253 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
254 graphAttrs [Rank SameRank]
255 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
256
257 {-- 6) create a node for each group -}
258 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
259 ) $ phylo ^. phylo_periods
260
261 {-- 7) create the edges between a branch and its first groups -}
262 _ <- mapM (\(bId,groups) ->
263 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
264 )
265 $ toList
266 $ map (\groups -> head' "toDot"
267 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
268 $ sortOn (fst . _phylo_groupPeriod) groups)
269 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
270
271 {- 8) create the edges between the groups -}
272 _ <- mapM (\((k,k'),v) ->
273 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
274 ) $ (toList . mergePointers) $ export ^. export_groups
275
276 {- 8-bis) create the edges between the groups -}
277 {- _ <- mapM (\((k,k'),v) ->
278 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
279 ) $ mergePointersMemory $ export ^. export_groups -}
280
281 _ <- mapM (\((k,k'),v) ->
282 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
283 ) $ mergeAncestors $ export ^. export_groups
284
285 -- 10) create the edges between the periods
286 _ <- mapM (\(prd,prd') ->
287 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
288 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
289
290 {- 8) create the edges between the branches
291 -- _ <- mapM (\(bId,bId') ->
292 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
293 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
294 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
295 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
296 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
297 -}
298
299
300 graphAttrs [Rank SameRank]
301
302
303 ----------------
304 -- | Filter | --
305 ----------------
306
307 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
308 filterByBranchSize thr export =
309 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
310 in export & export_branches .~ (fst splited)
311 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
312
313
314 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
315 processFilters filters qua export =
316 foldl (\export' f -> case f of
317 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
318 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
319 else filterByBranchSize thr export'
320 ) export filters
321
322 --------------
323 -- | Sort | --
324 --------------
325
326 branchToIso :: [PhyloBranch] -> [PhyloBranch]
327 branchToIso branches =
328 let steps = map sum
329 $ inits
330 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
331 $ zip branches
332 $ ([0] ++ (map (\(b,b') ->
333 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
334 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
335 in
336 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
337 then (b' ^. branch_seaLevel) !! (lmin - 1)
338 else (b' ^. branch_seaLevel) !! (idx - 1)
339 ) $ listToSeq branches))
340 in map (\(x,b) -> b & branch_x .~ x)
341 $ zip steps branches
342
343 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
344 branchToIso' start step branches =
345 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
346 $ inits
347 $ ([0] ++ (map (\(b,b') ->
348 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
349 in 1 - start - step * root) $ listToSeq branches))
350 in map (\(x,b) -> b & branch_x .~ x)
351 $ zip bx branches
352
353
354 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
355 sortByHierarchy depth branches =
356 if (length branches == 1)
357 then branches
358 else concat
359 $ map (\branches' ->
360 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
361 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
362 ++ (sortByHierarchy (depth + 1) (snd partitions)))
363 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
364 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
365
366
367 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
368 sortByBirthDate order export =
369 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
370 branches' = case order of
371 Asc -> branches
372 Desc -> reverse branches
373 in export & export_branches .~ branches'
374
375 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
376 processSort sort' elev export = case sort' of
377 ByBirthDate o -> sortByBirthDate o export
378 ByHierarchy _ -> case elev of
379 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
380 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
381 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
382
383 -----------------
384 -- | Metrics | --
385 -----------------
386
387 -- | Return the conditional probability of i knowing j
388 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
389 conditional m i j = (findWithDefault 0 (i,j) m)
390 / (m ! (j,j))
391
392
393 -- | Return the genericity score of a given ngram
394 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
395 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
396 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
397
398
399 -- | Return the specificity score of a given ngram
400 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
401 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
402 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
403
404
405 -- | Return the inclusion score of a given ngram
406 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
407 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
408 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
409
410
411 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
412 ngramsMetrics phylo export =
413 over ( export_groups
414 . traverse )
415 (\g -> g & phylo_groupMeta %~ insert "genericity"
416 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
417 & phylo_groupMeta %~ insert "specificity"
418 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
419 & phylo_groupMeta %~ insert "inclusion"
420 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
421 & phylo_groupMeta %~ insert "frequence"
422 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
423 ) export
424
425
426 branchDating :: PhyloExport -> PhyloExport
427 branchDating export =
428 over ( export_branches
429 . traverse )
430 (\b ->
431 let groups = sortOn fst
432 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
433 then acc ++ [g ^. phylo_groupPeriod]
434 else acc ) [] $ export ^. export_groups
435 periods = nub groups
436 birth = fst $ head' "birth" groups
437 death = snd $ last' "death" groups
438 age = death - birth
439 in b & branch_meta %~ insert "birth" [fromIntegral birth]
440 & branch_meta %~ insert "death" [fromIntegral death]
441 & branch_meta %~ insert "age" [fromIntegral age]
442 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
443
444 processMetrics :: Phylo -> PhyloExport -> PhyloExport
445 processMetrics phylo export = ngramsMetrics phylo
446 $ branchDating export
447
448
449 -----------------
450 -- | Taggers | --
451 -----------------
452
453 nk :: Int -> [[Int]] -> Int
454 nk n groups = sum
455 $ map (\g -> if (elem n g)
456 then 1
457 else 0) groups
458
459
460 tf :: Int -> [[Int]] -> Double
461 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
462
463
464 idf :: Int -> [[Int]] -> Double
465 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
466
467
468 findTfIdf :: [[Int]] -> [(Int,Double)]
469 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
470
471
472 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
473 findEmergences groups freq =
474 let ngrams = map _phylo_groupNgrams groups
475 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
476 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
477 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
478 in reverse $ sortOn snd
479 $ map (\(n,_) -> if (member n freq)
480 then (n,freq ! n)
481 else (n,0)) emerging
482
483
484 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
485 mostEmergentTfIdf nth freq foundations export =
486 over ( export_branches
487 . traverse )
488 (\b ->
489 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
490 tfidf = findTfIdf (map _phylo_groupNgrams groups)
491 emergences = findEmergences groups freq
492 selected = if (null emergences)
493 then map fst $ take nth tfidf
494 else [fst $ head' "mostEmergentTfIdf" emergences]
495 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
496 in b & branch_label .~ (ngramsToLabel foundations selected)) export
497
498
499 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
500 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
501 $ take nth
502 $ reverse
503 $ sortOn snd $ zip [0..] meta
504
505
506 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
507 mostInclusive nth foundations export =
508 over ( export_branches
509 . traverse )
510 (\b ->
511 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
512 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
513 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
514 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
515 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
516 in b & branch_label .~ lbl ) export
517
518
519 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
520 mostEmergentInclusive nth foundations export =
521 over ( export_groups
522 . traverse )
523 (\g ->
524 let lbl = ngramsToLabel foundations
525 $ take nth
526 $ map (\(_,(_,idx)) -> idx)
527 $ concat
528 $ map (\groups -> sortOn (fst . snd) groups)
529 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
530 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
531 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
532 in g & phylo_groupLabel .~ lbl ) export
533
534
535 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
536 processLabels labels foundations freq export =
537 foldl (\export' label ->
538 case label of
539 GroupLabel tagger nth ->
540 case tagger of
541 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
542 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
543 BranchLabel tagger nth ->
544 case tagger of
545 MostInclusive -> mostInclusive nth foundations export'
546 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
547 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
548
549
550 ------------------
551 -- | Dynamics | --
552 ------------------
553
554 -- utiliser & creer une Map FdtId [PhyloGroup]
555 -- n = index of the current term
556 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
557 toDynamics n elders g m =
558 let prd = g ^. phylo_groupPeriod
559 end = last' "dynamics" (sort $ map snd $ elems m)
560 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
561 {- decrease -}
562 then 2
563 else if ((fst prd) == (fst $ m ! n))
564 {- emerging -}
565 then 0
566 else if isNew
567 {- emergence -}
568 then 1
569 else 3
570 where
571 --------------------------------------
572 isNew :: Bool
573 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
574
575 type FdtId = Int
576 processDynamics :: [PhyloGroup] -> [PhyloGroup]
577 processDynamics groups =
578 map (\g ->
579 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
580 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
581 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
582 where
583 --------------------------------------
584 mapNgrams :: Map FdtId (Date,Date)
585 mapNgrams = map (\dates ->
586 let dates' = sort dates
587 in (head' "dynamics" dates', last' "dynamics" dates'))
588 $ fromListWith (++)
589 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
590 $ (g ^. phylo_groupNgrams))) [] groups
591
592
593 -----------------
594 -- | horizon | --
595 -----------------
596
597 getGroupThr :: Double -> PhyloGroup -> Double
598 getGroupThr step g =
599 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
600 breaks = (g ^. phylo_groupMeta) ! "breaks"
601 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
602
603 toAncestor :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
604 toAncestor nbDocs diago similarity step candidates ego =
605 let curr = ego ^. phylo_groupAncestors
606 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
607 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
608 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
609 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
610
611
612 headsToAncestors :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
613 headsToAncestors nbDocs diago similarity step heads acc =
614 if (null heads)
615 then acc
616 else
617 let ego = head' "headsToAncestors" heads
618 heads' = tail' "headsToAncestors" heads
619 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
620
621
622 toHorizon :: Phylo -> Phylo
623 toHorizon phylo =
624 let phyloAncestor = updatePhyloGroups
625 scale
626 (fromList $ map (\g -> (getGroupId g, g))
627 $ concat
628 $ tracePhyloAncestors newGroups) phylo
629 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
630 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
631 in updatePhyloGroups scale reBranched phylo
632 where
633 -- | 1) for each periods
634 periods :: [Period]
635 periods = getPeriodIds phylo
636 -- --
637 scale :: Scale
638 scale = getLastLevel phylo
639 -- --
640 frame :: Int
641 frame = getTimeFrame $ timeUnit $ getConfig phylo
642 -- | 2) find ancestors between groups without parents
643 mapGroups :: [[PhyloGroup]]
644 mapGroups = map (\prd ->
645 let groups = getGroupsFromScalePeriods scale [prd] phylo
646 childs = getPreviousChildIds scale frame prd periods phylo
647 -- maybe add a better filter for non isolated ancestors
648 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
649 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
650 noHeads = groups \\ heads
651 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
652 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
653 sim = (similarity $ getConfig phylo)
654 step = case getSeaElevation phylo of
655 Constante _ s -> s
656 Adaptative _ -> 0
657 Evolving _ -> 0
658 -- in headsToAncestors nbDocs diago Similarity heads groups []
659 in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
660 $ headsToAncestors nbDocs diago sim step heads []
661 ) periods
662 -- | 3) process this task concurrently
663 newGroups :: [[PhyloGroup]]
664 newGroups = mapGroups `using` parList rdeepseq
665 --------------------------------------
666
667 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
668 getPreviousChildIds lvl frame curr prds phylo =
669 concat $ map ((map fst) . _phylo_groupPeriodChilds)
670 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
671
672 ---------------------
673 -- | phyloExport | --
674 ---------------------
675
676 toPhyloExport :: Phylo -> DotGraph DotId
677 toPhyloExport phylo = exportToDot phylo
678 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
679 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
680 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
681 $ processMetrics phylo export
682 where
683 export :: PhyloExport
684 export = PhyloExport groups branches
685 --------------------------------------
686 branches :: [PhyloBranch]
687 branches = map (\g ->
688 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
689 breaks = (g ^. phylo_groupMeta) ! "breaks"
690 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
691 in PhyloBranch (g ^. phylo_groupBranchId)
692 canonId
693 seaLvl
694 0
695 (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
696 0
697 0
698 "" empty)
699 $ map (\gs -> head' "export" gs)
700 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
701 $ sortOn (\g -> g ^. phylo_groupBranchId) groups
702 --------------------------------------
703 groups :: [PhyloGroup]
704 groups = traceExportGroups
705 -- necessaire ?
706 $ processDynamics
707 $ getGroupsFromScale (phyloScale $ getConfig phylo)
708 $ tracePhyloInfo phylo
709
710
711 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
712 traceExportBranches branches = trace ("\n"
713 <> "-- | Export " <> show(length branches) <> " branches") branches
714
715 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
716 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
717
718 tracePhyloInfo :: Phylo -> Phylo
719 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
720 <> show(getLevel phylo) <> " applied to "
721 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
722 ) phylo
723
724
725 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
726 traceExportGroups groups = trace ("\n" <> "-- | Export "
727 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
728 <> show(length groups) <> " groups and "
729 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
730 ) groups
731