]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
[phylo] slight refactoring
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloExport.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloExport
3 Description : Exportation module of a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE TypeSynonymInstances #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloExport where
14
15 import Control.Lens hiding (Level)
16 import Control.Parallel.Strategies (parList, rdeepseq, using)
17 import Data.GraphViz hiding (DotGraph, Order)
18 import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
19 import Data.GraphViz.Types.Generalised (DotGraph)
20 import Data.GraphViz.Types.Monadic
21 import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
22 import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
23 import Data.Text.Lazy (fromStrict, pack, unpack)
24 import Data.Vector (Vector)
25 import Debug.Trace (trace)
26 import Gargantext.Core.Viz.Phylo
27 import Gargantext.Core.Viz.Phylo.PhyloTools
28 import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
29 import Gargantext.Prelude hiding (scale)
30 import Prelude (writeFile)
31 import Protolude (floor)
32 import System.FilePath
33 import qualified Data.GraphViz.Attributes.HTML as H
34 import qualified Data.Text as Text
35 import qualified Data.Text.Lazy as Lazy
36 import qualified Data.Vector as Vector
37
38 --------------------
39 -- | Dot export | --
40 --------------------
41
42 dotToFile :: FilePath -> DotGraph DotId -> IO ()
43 dotToFile filePath dotG = writeFile filePath $ dotToString dotG
44
45 dotToString :: DotGraph DotId -> [Char]
46 dotToString dotG = unpack (printDotGraph dotG)
47
48 dynamicToColor :: Int -> H.Attribute
49 dynamicToColor d
50 | d == 0 = H.BGColor (toColor LightCoral)
51 | d == 1 = H.BGColor (toColor Khaki)
52 | d == 2 = H.BGColor (toColor SkyBlue)
53 | otherwise = H.Color (toColor Black)
54
55 pickLabelColor :: [Int] -> H.Attribute
56 pickLabelColor lst
57 | elem 0 lst = dynamicToColor 0
58 | elem 1 lst = dynamicToColor 1
59 | elem 2 lst = dynamicToColor 2
60 | otherwise = dynamicToColor 3
61
62 toDotLabel :: Text.Text -> Label
63 toDotLabel lbl = StrLabel $ fromStrict lbl
64
65 toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
66 toAttr k v = customAttribute k v
67
68 metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
69 metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
70
71 groupIdToDotId :: PhyloGroupId -> DotId
72 groupIdToDotId (((d, d'), lvl), idx) =
73 (fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
74
75 branchIdToDotId :: PhyloBranchId -> DotId
76 branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
77
78 periodIdToDotId :: Period -> DotId
79 periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
80
81 groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
82 groupToTable fdt g =
83 H.Table H.HTable
84 { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
85 , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
86 , H.tableRows = [header]
87 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
88 <> ( map ngramsToRow $ splitEvery 4
89 $ reverse $ sortOn (snd . snd)
90 $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
91 $ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
92 where
93 --------------------------------------
94 ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
95 ngramsToRow ns =
96 H.Cells $ map (\(n, (d, _)) ->
97 H.LabelCell [ H.Align H.HLeft
98 , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
99 --------------------------------------
100 header :: H.Row
101 header =
102 H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
103 $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
104 <> fromStrict " ( "
105 <> (pack $ show (fst $ g ^. phylo_groupPeriod))
106 <> fromStrict " , "
107 <> (pack $ show (snd $ g ^. phylo_groupPeriod))
108 <> fromStrict " ) "
109 <> (pack $ show (getGroupId g))]]
110 --------------------------------------
111
112 branchToDotNode :: PhyloBranch -> Int -> Dot DotId
113 branchToDotNode b bId =
114 node (branchIdToDotId $ b ^. branch_id)
115 ( [ FillColor [toWColor CornSilk]
116 , FontName "Arial"
117 , FontSize 40
118 , Shape Egg
119 , Style [SItem Bold []]
120 , Label (toDotLabel $ b ^. branch_label) ]
121 <> (metaToAttr $ b ^. branch_meta)
122 <> [ toAttr "nodeType" "branch"
123 , toAttr "bId" (pack $ show bId)
124 , toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
125 , toAttr "branch_x" (fromStrict $ Text.pack $ show $ b ^. branch_x)
126 , toAttr "branch_y" (fromStrict $ Text.pack $ show $ b ^. branch_y)
127 , toAttr "label" (pack $ show $ b ^. branch_label)
128 ])
129
130 periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
131 periodToDotNode prd prd' =
132 node (periodIdToDotId prd) $
133 [ Shape BoxShape
134 , FontSize 50
135 , Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
136 <> [ toAttr "nodeType" "period"
137 , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
138 , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
139 , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
140 , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
141
142
143 groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
144 groupToDotNode fdt g bId =
145 node (groupIdToDotId $ getGroupId g)
146 ( [ FontName "Arial"
147 , Shape Square
148 , penWidth 4
149 , toLabel (groupToTable fdt g) ]
150 <> [ toAttr "nodeType" "group"
151 , toAttr "gid" (groupIdToDotId $ getGroupId g)
152 , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
153 , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
154 , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
155 , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
156 , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
157 , toAttr "bId" (pack $ show bId)
158 , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
159 , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
160 , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
161 , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
162 , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
163 , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
164 , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
165 , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
166 , toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
167 ])
168
169
170 toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
171 toDotEdge' source target thr w edgeType = edge source target
172 (case edgeType of
173 GroupToGroup -> undefined
174 GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
175 BranchToGroup -> undefined
176 BranchToBranch -> undefined
177 GroupToAncestor -> undefined
178 PeriodToPeriod -> undefined)
179
180
181 toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
182 toDotEdge source target lbl edgeType = edge source target
183 (case edgeType of
184 GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
185 GroupToGroupMemory -> undefined
186 BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
187 BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
188 GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl), toAttr "source" source, toAttr "target" target]
189 PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
190
191
192 mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
193 mergePointers groups =
194 let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
195 toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
196 in unionWith (\w w' -> max w w') toChilds toParents
197
198 mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
199 mergePointersMemory groups =
200 let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
201 toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
202 in concat [toChilds,toParents]
203
204 mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
205 mergeAncestors groups = concat
206 $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
207 $ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
208
209
210 toBid :: PhyloGroup -> [PhyloBranch] -> Int
211 toBid g bs =
212 let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
213 in fromJust $ elemIndex b' bs
214
215 exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
216 exportToDot phylo export =
217 trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
218 <> show(length $ export ^. export_groups) <> " groups "
219 <> show(length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
220 <> "##########################") $
221 digraph ((Str . fromStrict) $ phyloName $ getConfig phylo) $ do
222
223 {- 1) init the dot graph -}
224 graphAttrs ( [ Label (toDotLabel $ phyloName $ getConfig phylo)]
225 <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
226 , Ratio FillRatio
227 -- , Ratio AutoRatio
228 , Style [SItem Filled []],Color [toWColor White]]
229 {-- home made attributes -}
230 <> [ toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)
231 , toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
232 , toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ getDocsByDate phylo)
233 , toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)
234 , toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)
235 , toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)
236 , toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo)
237 , toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo
238 , toAttr (fromStrict "PhyloScale") $ pack $ show (getLevel phylo)
239 , toAttr (fromStrict "phyloQuality") $ pack $ show (phylo ^. phylo_quality)
240 , toAttr (fromStrict "phyloSeaRiseStart") $ pack $ show (getPhyloSeaRiseStart phylo)
241 , toAttr (fromStrict "phyloSeaRiseSteps") $ pack $ show (getPhyloSeaRiseSteps phylo)
242 -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
243 ])
244
245 {-
246 -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
247
248 -- 2) create a layer for the branches labels -}
249 subgraph (Str "Branches peaks") $ do
250
251 -- graphAttrs [Rank SameRank]
252 {-
253 -- 3) group the branches by hierarchy
254 -- mapM (\branches ->
255 -- subgraph (Str "Branches clade") $ do
256 -- graphAttrs [Rank SameRank]
257
258 -- -- 4) create a node for each branch
259 -- mapM branchToDotNode branches
260 -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
261 -}
262 mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
263
264 {-- 5) create a layer for each period -}
265 _ <- mapM (\period ->
266 subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
267 graphAttrs [Rank SameRank]
268 periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriodStr)
269
270 {-- 6) create a node for each group -}
271 mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
272 ) $ phylo ^. phylo_periods
273
274 {-- 7) create the edges between a branch and its first groups -}
275 _ <- mapM (\(bId,groups) ->
276 mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
277 )
278 $ toList
279 $ map (\groups -> head' "toDot"
280 $ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
281 $ sortOn (fst . _phylo_groupPeriod) groups)
282 $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
283
284 {- 8) create the edges between the groups -}
285 _ <- mapM (\((k,k'),v) ->
286 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
287 ) $ (toList . mergePointers) $ export ^. export_groups
288
289 {- 8-bis) create the edges between the groups -}
290 {- _ <- mapM (\((k,k'),v) ->
291 toDotEdge' (groupIdToDotId k) (groupIdToDotId k') (show (fst v)) (show (snd v)) GroupToGroupMemory
292 ) $ mergePointersMemory $ export ^. export_groups -}
293
294 _ <- mapM (\((k,k'),v) ->
295 toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
296 ) $ mergeAncestors $ export ^. export_groups
297
298 -- 10) create the edges between the periods
299 _ <- mapM (\(prd,prd') ->
300 toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
301 ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
302
303 {- 8) create the edges between the branches
304 -- _ <- mapM (\(bId,bId') ->
305 -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
306 -- (Text.pack $ show(branchIdsToSimilarity bId bId'
307 -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
308 -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
309 -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
310 -}
311
312
313 graphAttrs [Rank SameRank]
314
315
316 ----------------
317 -- | Filter | --
318 ----------------
319
320 filterByBranchSize :: Double -> PhyloExport -> PhyloExport
321 filterByBranchSize thr export =
322 let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
323 in export & export_branches .~ (fst splited)
324 & export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
325
326
327 processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
328 processFilters filters qua export =
329 foldl (\export' f -> case f of
330 ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
331 then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
332 else filterByBranchSize thr export'
333 ) export filters
334
335 --------------
336 -- | Sort | --
337 --------------
338
339 branchToIso :: [PhyloBranch] -> [PhyloBranch]
340 branchToIso branches =
341 let steps = map sum
342 $ inits
343 $ map (\(b,x) -> b ^. branch_y + 0.05 - x)
344 $ zip branches
345 $ ([0] ++ (map (\(b,b') ->
346 let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
347 lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
348 in
349 if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
350 then (b' ^. branch_seaLevel) !! (lmin - 1)
351 else (b' ^. branch_seaLevel) !! (idx - 1)
352 ) $ listToSeq branches))
353 in map (\(x,b) -> b & branch_x .~ x)
354 $ zip steps branches
355
356 branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
357 branchToIso' start step branches =
358 let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
359 $ inits
360 $ ([0] ++ (map (\(b,b') ->
361 let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
362 in 1 - start - step * root) $ listToSeq branches))
363 in map (\(x,b) -> b & branch_x .~ x)
364 $ zip bx branches
365
366
367 sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
368 sortByHierarchy depth branches =
369 if (length branches == 1)
370 then branches
371 else concat
372 $ map (\branches' ->
373 let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
374 in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
375 ++ (sortByHierarchy (depth + 1) (snd partitions)))
376 $ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
377 $ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
378
379
380 sortByBirthDate :: Order -> PhyloExport -> PhyloExport
381 sortByBirthDate order export =
382 let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
383 branches' = case order of
384 Asc -> branches
385 Desc -> reverse branches
386 in export & export_branches .~ branches'
387
388 processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
389 processSort sort' elev export = case sort' of
390 ByBirthDate o -> sortByBirthDate o export
391 ByHierarchy _ -> case elev of
392 Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
393 Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
394 Evolving _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
395
396 -----------------
397 -- | Metrics | --
398 -----------------
399
400 -- | Return the conditional probability of i knowing j
401 conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
402 conditional m i j = (findWithDefault 0 (i,j) m)
403 / (m ! (j,j))
404
405
406 -- | Return the genericity score of a given ngram
407 genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
408 genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
409 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
410
411
412 -- | Return the specificity score of a given ngram
413 specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
414 specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
415 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
416
417
418 -- | Return the inclusion score of a given ngram
419 inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
420 inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
421 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
422
423
424 ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
425 ngramsMetrics phylo export =
426 over ( export_groups
427 . traverse )
428 (\g -> g & phylo_groupMeta %~ insert "genericity"
429 (map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
430 & phylo_groupMeta %~ insert "specificity"
431 (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
432 & phylo_groupMeta %~ insert "inclusion"
433 (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
434 & phylo_groupMeta %~ insert "frequence"
435 (map (\n -> getInMap n (getLastRootsFreq phylo)) $ g ^. phylo_groupNgrams)
436 ) export
437
438
439 branchDating :: PhyloExport -> PhyloExport
440 branchDating export =
441 over ( export_branches
442 . traverse )
443 (\b ->
444 let groups = sortOn fst
445 $ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
446 then acc ++ [g ^. phylo_groupPeriod]
447 else acc ) [] $ export ^. export_groups
448 periods = nub groups
449 birth = fst $ head' "birth" groups
450 age = (snd $ last' "age" groups) - birth
451 in b & branch_meta %~ insert "birth" [fromIntegral birth]
452 & branch_meta %~ insert "age" [fromIntegral age]
453 & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
454
455 processMetrics :: Phylo -> PhyloExport -> PhyloExport
456 processMetrics phylo export = ngramsMetrics phylo
457 $ branchDating export
458
459
460 -----------------
461 -- | Taggers | --
462 -----------------
463
464 nk :: Int -> [[Int]] -> Int
465 nk n groups = sum
466 $ map (\g -> if (elem n g)
467 then 1
468 else 0) groups
469
470
471 tf :: Int -> [[Int]] -> Double
472 tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
473
474
475 idf :: Int -> [[Int]] -> Double
476 idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
477
478
479 findTfIdf :: [[Int]] -> [(Int,Double)]
480 findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ nub $ concat groups
481
482
483 findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
484 findEmergences groups freq =
485 let ngrams = map _phylo_groupNgrams groups
486 dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
487 emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
488 $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
489 in reverse $ sortOn snd
490 $ map (\(n,_) -> if (member n freq)
491 then (n,freq ! n)
492 else (n,0)) emerging
493
494
495 mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
496 mostEmergentTfIdf nth freq foundations export =
497 over ( export_branches
498 . traverse )
499 (\b ->
500 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
501 tfidf = findTfIdf (map _phylo_groupNgrams groups)
502 emergences = findEmergences groups freq
503 selected = if (null emergences)
504 then map fst $ take nth tfidf
505 else [fst $ head' "mostEmergentTfIdf" emergences]
506 ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
507 in b & branch_label .~ (ngramsToLabel foundations selected)) export
508
509
510 getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
511 getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
512 $ take nth
513 $ reverse
514 $ sortOn snd $ zip [0..] meta
515
516
517 mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
518 mostInclusive nth foundations export =
519 over ( export_branches
520 . traverse )
521 (\b ->
522 let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
523 cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
524 ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
525 inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
526 lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
527 in b & branch_label .~ lbl ) export
528
529
530 mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
531 mostEmergentInclusive nth foundations export =
532 over ( export_groups
533 . traverse )
534 (\g ->
535 let lbl = ngramsToLabel foundations
536 $ take nth
537 $ map (\(_,(_,idx)) -> idx)
538 $ concat
539 $ map (\groups -> sortOn (fst . snd) groups)
540 $ groupBy ((==) `on` fst) $ reverse $ sortOn fst
541 $ zip ((g ^. phylo_groupMeta) ! "inclusion")
542 $ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
543 in g & phylo_groupLabel .~ lbl ) export
544
545
546 processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
547 processLabels labels foundations freq export =
548 foldl (\export' label ->
549 case label of
550 GroupLabel tagger nth ->
551 case tagger of
552 MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
553 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
554 BranchLabel tagger nth ->
555 case tagger of
556 MostInclusive -> mostInclusive nth foundations export'
557 MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
558 _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
559
560
561 ------------------
562 -- | Dynamics | --
563 ------------------
564
565 -- utiliser & creer une Map FdtId [PhyloGroup]
566 -- n = index of the current term
567 toDynamics :: FdtId -> [PhyloGroup] -> PhyloGroup -> Map FdtId (Date,Date) -> Double
568 toDynamics n elders g m =
569 let prd = g ^. phylo_groupPeriod
570 end = last' "dynamics" (sort $ map snd $ elems m)
571 in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
572 {- decrease -}
573 then 2
574 else if ((fst prd) == (fst $ m ! n))
575 {- emerging -}
576 then 0
577 else if isNew
578 {- emergence -}
579 then 1
580 else 3
581 where
582 --------------------------------------
583 isNew :: Bool
584 isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
585
586 type FdtId = Int
587 processDynamics :: [PhyloGroup] -> [PhyloGroup]
588 processDynamics groups =
589 map (\g ->
590 let elders = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
591 && ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
592 in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n elders g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
593 where
594 --------------------------------------
595 mapNgrams :: Map FdtId (Date,Date)
596 mapNgrams = map (\dates ->
597 let dates' = sort dates
598 in (head' "dynamics" dates', last' "dynamics" dates'))
599 $ fromListWith (++)
600 $ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
601 $ (g ^. phylo_groupNgrams))) [] groups
602
603
604 -----------------
605 -- | horizon | --
606 -----------------
607
608 getGroupThr :: Double -> PhyloGroup -> Double
609 getGroupThr step g =
610 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
611 breaks = (g ^. phylo_groupMeta) ! "breaks"
612 in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
613
614 toAncestor :: Double
615 -> Map Int Double
616 -> PhyloSimilarity
617 -> Double
618 -> [PhyloGroup]
619 -> PhyloGroup
620 -> PhyloGroup
621 toAncestor nbDocs diago similarity step candidates ego =
622 let curr = ego ^. phylo_groupAncestors
623 in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
624 $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
625 $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
626 $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
627
628
629 headsToAncestors :: Double
630 -> Map Int Double
631 -> PhyloSimilarity
632 -> Double
633 -> [PhyloGroup]
634 -> [PhyloGroup]
635 -> [PhyloGroup]
636 headsToAncestors nbDocs diago similarity step heads acc =
637 if (null heads)
638 then acc
639 else
640 let ego = head' "headsToAncestors" heads
641 heads' = tail' "headsToAncestors" heads
642 in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
643
644
645 toHorizon :: Phylo -> Phylo
646 toHorizon phylo =
647 let phyloAncestor = updatePhyloGroups
648 scale
649 (fromList $ map (\g -> (getGroupId g, g))
650 $ concat
651 $ tracePhyloAncestors newGroups) phylo
652 reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
653 $ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromScale scale phyloAncestor
654 in updatePhyloGroups scale reBranched phylo
655 where
656 -- | 1) for each periods
657 periods :: [Period]
658 periods = getPeriodIds phylo
659 -- --
660 scale :: Scale
661 scale = getLastLevel phylo
662 -- --
663 frame :: Int
664 frame = getTimeFrame $ timeUnit $ getConfig phylo
665 -- | 2) find ancestors between groups without parents
666 mapGroups :: [[PhyloGroup]]
667 mapGroups = map (\prd ->
668 let groups = getGroupsFromScalePeriods scale [prd] phylo
669 childs = getPreviousChildIds scale frame prd periods phylo
670 -- maybe add a better filter for non isolated ancestors
671 heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
672 $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
673 noHeads = groups \\ heads
674 nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd]
675 diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
676 sim = (similarity $ getConfig phylo)
677 step = case getSeaElevation phylo of
678 Constante _ s -> s
679 Adaptative _ -> 0
680 Evolving _ -> 0
681 -- in headsToAncestors nbDocs diago Similarity heads groups []
682 in map (toAncestor nbDocs diago sim step noHeads)
683 $ headsToAncestors nbDocs diago sim step heads []
684 ) periods
685 -- | 3) process this task concurrently
686 newGroups :: [[PhyloGroup]]
687 newGroups = mapGroups `using` parList rdeepseq
688 --------------------------------------
689
690 getPreviousChildIds :: Scale -> Int -> Period -> [Period] -> Phylo -> [PhyloGroupId]
691 getPreviousChildIds lvl frame curr prds phylo =
692 concat $ map ((map fst) . _phylo_groupPeriodChilds)
693 $ getGroupsFromScalePeriods lvl (getNextPeriods ToParents frame curr prds) phylo
694
695 ---------------------
696 -- | phyloExport | --
697 ---------------------
698
699 toPhyloExport :: Phylo -> DotGraph DotId
700 toPhyloExport phylo = exportToDot phylo
701 $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
702 $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
703 $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (getLastRootsFreq phylo)
704 $ processMetrics phylo export
705 where
706 export :: PhyloExport
707 export = PhyloExport groups branches
708 --------------------------------------
709 branches :: [PhyloBranch]
710 branches = map (\g ->
711 let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
712 breaks = (g ^. phylo_groupMeta) ! "breaks"
713 canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
714 in PhyloBranch { _branch_id = g ^. phylo_groupBranchId
715 , _branch_canonId = canonId
716 , _branch_seaLevel = seaLvl
717 , _branch_x = 0
718 , _branch_y = last' "export" $ take (round $ (last' "export" breaks) + 1) seaLvl
719 , _branch_w = 0
720 , _branch_t = 0
721 , _branch_label = ""
722 , _branch_meta = empty })
723 $ map (head' "export")
724 $ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
725 $ sortOn (^. phylo_groupBranchId) groups
726 --------------------------------------
727 groups :: [PhyloGroup]
728 groups = traceExportGroups
729 -- necessaire ?
730 $ processDynamics
731 $ getGroupsFromScale (phyloScale $ getConfig phylo)
732 $ tracePhyloInfo phylo
733
734
735 traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
736 traceExportBranches branches = trace ("\n"
737 <> "-- | Export " <> show(length branches) <> " branches") branches
738
739 tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
740 tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
741
742 tracePhyloInfo :: Phylo -> Phylo
743 tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
744 <> show(getLevel phylo) <> " applied to "
745 <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
746 ) phylo
747
748
749 traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
750 traceExportGroups groups = trace ("\n" <> "-- | Export "
751 <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
752 <> show(length groups) <> " groups and "
753 <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
754 ) groups