]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Example.hs
Add the clustering up to level 2 and more
[gargantext.git] / src / Gargantext / Viz / Phylo / Example.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
13
14
15 TODO:
16 - split the functions : RAW -> Document -> Ngrams
17
18 -- reverse history: antechronologique
19 -- metrics support
20
21
22 -}
23
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
27
28 module Gargantext.Viz.Phylo.Example where
29
30 import Control.Lens hiding (makeLenses, both, Level)
31
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
36 import Data.Set (Set)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
41
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
47
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
55
56
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
59
60
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
63
64
65 -- | To do : ajouter de nouveaux clusters / proxi
66 -- gérer les cooc à level 2 et +, idem pour les quality
67 -- réfléchir aux formats de sortie
68
69
70 -- | To apply a Clustering method to a PhyloGraph
71 graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
72 graphToClusters (clust,param) (nodes,edges) = case clust of
73 Louvain -> undefined
74 RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
75
76
77 -- | To transform a Phylo into Clusters of PhyloGroups at a given level
78 phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
79 phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
80 $ zip (getPhyloPeriods p)
81 (map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters (getLevelValue lvl) prd p) p
82 in if null (fst graph)
83 then []
84 else graphToClusters (clus,param') graph)
85 (getPhyloPeriods p))
86
87
88 -- | To transform a Cluster into a Phylogroup
89 clusterToGroup :: PhyloPeriodId -> Int -> Int -> Text -> [PhyloGroup] -> PhyloGroup
90 clusterToGroup prd lvl idx lbl groups = PhyloGroup ((prd, lvl), idx)
91 lbl
92 ((sort . nub . concat) $ map getGroupNgrams groups)
93 empty
94 empty
95 [] [] []
96 (map (\g -> (getGroupId g, 1)) groups)
97
98 -- | To transform a list of Clusters into a new Phylolevel
99 clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
100 clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
101 (\period ->
102 let periodId = _phylo_periodId period
103 clusters = zip [1..] (m ! periodId)
104 in over (phylo_periodLevels)
105 (\levels ->
106 let groups = map (\cluster -> clusterToGroup periodId (getLevelValue lvl) (fst cluster) "" (snd cluster)) clusters
107 in levels ++ [PhyloLevel (periodId, (getLevelValue lvl)) groups]
108 ) period) p
109
110
111 phyloWithGroups2 = clustersToPhyloLevel (initLevel 2 Level_N)
112 (phyloToClusters (initLevel 1 Level_1) (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1)
113 phyloWithBranches_1
114
115 ------------------------------------------------------------------------
116 -- | STEP 12 | -- Find the Branches
117
118
119 -- | To apply the related components method to a PhyloGraph
120 -- curr = the current PhyloGroup
121 -- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
122 -- next = the next PhyloGroups to be added in the cluster
123 -- memo = the memory of the allready created clusters
124 relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
125 relatedComp idx curr (nodes,edges) next memo
126 | null nodes' && null next' = memo'
127 | (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
128 | otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
129 where
130 --------------------------------------
131 memo' :: [[PhyloGroup]]
132 memo'
133 | null memo = [[curr]]
134 | idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
135 | otherwise = memo ++ [[curr]]
136 --------------------------------------
137 next' :: [PhyloGroup]
138 next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
139 --------------------------------------
140 nodes' :: [PhyloGroup]
141 nodes' = filter (\x -> not $ elem x next') nodes
142 --------------------------------------
143
144
145 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
146 graphToBranches :: Int -> PhyloGraph -> Phylo -> [PhyloBranch]
147 graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c)) $ zip [0..] clusters
148 where
149 --------------------------------------
150 clusters :: [[PhyloGroup]]
151 clusters = relatedComp 0 (head nodes) (tail nodes,edges) [] []
152 --------------------------------------
153
154
155 -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
156 groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
157 groupsToGraph (prox,param) groups p = (groups,edges)
158 where
159 edges :: PhyloEdges
160 edges = case prox of
161 FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
162 ++
163 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
164 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
165 (param !! 0) (getGroupCooc x)
166 (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
167 _ -> undefined
168
169
170 -- | To set all the PhyloBranches for a given Level in a Phylo
171 setPhyloBranches :: Level -> Phylo -> Phylo
172 setPhyloBranches lvl p = alterPhyloBranches
173 (\branches -> branches
174 ++
175 (graphToBranches (getLevelValue lvl) (groupsToGraph (FromPairs,[]) groups p) p)
176 ) p
177 where
178 --------------------------------------
179 groups :: [PhyloGroup]
180 groups = getGroupsWithLevel (getLevelValue lvl) p
181 --------------------------------------
182
183
184 phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs
185
186
187 ------------------------------------------------------------------------
188 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
189
190
191 -- | To process the weightedLogJaccard between two PhyloGroups fields
192 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
193 weightedLogJaccard s f1 f2
194 | null wUnion = 0
195 | wUnion == wInter = 1
196 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
197 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
198 | otherwise = (sumLog wInter)/(sumLog wUnion)
199 where
200 --------------------------------------
201 wInter :: [Double]
202 wInter = elems $ intersectionWith (+) f1 f2
203 --------------------------------------
204 wUnion :: [Double]
205 wUnion = elems $ unionWith (+) f1 f2
206 --------------------------------------
207 sumInvLog :: [Double] -> Double
208 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
209 --------------------------------------
210 sumLog :: [Double] -> Double
211 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
212 --------------------------------------
213
214
215 -- | To apply the corresponding proximity function based on a given Proximity
216 getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
217 getProximity (prox,param) g1 g2 = case prox of
218 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
219 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
220
221
222 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
223 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
224 getNextPeriods to id l = case to of
225 Childs -> unNested id ((tail . snd) next)
226 Parents -> unNested id ((reverse . fst) next)
227 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
228 where
229 --------------------------------------
230 next :: ([PhyloPeriodId], [PhyloPeriodId])
231 next = splitAt idx l
232 --------------------------------------
233 idx :: Int
234 idx = case (List.elemIndex id l) of
235 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
236 Just i -> i
237 --------------------------------------
238 -- | To have an non-overlapping next period
239 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
240 unNested x l
241 | null l = []
242 | nested (fst $ head l) x = unNested x (tail l)
243 | nested (snd $ head l) x = unNested x (tail l)
244 | otherwise = l
245 --------------------------------------
246 nested :: Date -> PhyloPeriodId -> Bool
247 nested d prd = d >= fst prd && d <= snd prd
248 --------------------------------------
249
250
251 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
252 findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
253 findBestCandidates to depth max thr (prox,param) group p
254 | depth > max || null next = []
255 | (not . null) best = take 2 best
256 | otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
257 where
258 --------------------------------------
259 next :: [PhyloPeriodId]
260 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
261 --------------------------------------
262 candidates :: [PhyloGroup]
263 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
264 --------------------------------------
265 scores :: [(PhyloGroupId, Double)]
266 scores = map (\group' -> getProximity (prox,param) group group') candidates
267 --------------------------------------
268 best :: [(PhyloGroupId, Double)]
269 best = reverse
270 $ sortOn snd
271 $ filter (\(id,score) -> score >= thr) scores
272 --------------------------------------
273
274
275 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
276 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
277 makePair to group ids = case to of
278 Childs -> over (phylo_groupPeriodChilds) addPointers group
279 Parents -> over (phylo_groupPeriodParents) addPointers group
280 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
281 where
282 --------------------------------------
283 addPointers :: [Pointer] -> [Pointer]
284 addPointers l = nub $ (l ++ ids)
285 --------------------------------------
286
287
288 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
289 pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
290 pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
291 (\groups ->
292 map (\group ->
293 if (getGroupLevel group) == (getLevelValue lvl)
294 then
295 let
296 --------------------------------------
297 candidates :: [(PhyloGroupId, Double)]
298 candidates = findBestCandidates to 1 5 thr (prox,param) group p
299 --------------------------------------
300 in
301 makePair to group candidates
302 else
303 group ) groups) p
304
305
306 phyloWithPair_1_Childs :: Phylo
307 phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
308
309
310 phyloWithPair_1_Parents :: Phylo
311 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
312
313
314 ------------------------------------------------------------------------
315 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
316
317
318 -- | Are two PhyloGroups sharing at leats one Ngrams
319 shareNgrams :: PhyloGroup -> PhyloGroup -> Bool
320 shareNgrams g g' = (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')
321
322
323 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
324 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
325 getKeyPair (x,y) m = case findPair (x,y) m of
326 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
327 Just i -> i
328 where
329 --------------------------------------
330 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
331 findPair (x,y) m
332 | member (x,y) m = Just (x,y)
333 | member (y,x) m = Just (y,x)
334 | otherwise = Nothing
335 --------------------------------------
336
337
338 -- | To transform the Fis into a coocurency Matrix in a Phylo
339 fisToCooc :: Map (Date, Date) Fis -> Phylo -> Map (Int, Int) Double
340 fisToCooc m p = map (/docs)
341 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
342 $ concat
343 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x) fis
344 where
345 --------------------------------------
346 fis :: [(Clique,Support)]
347 fis = concat $ map (\x -> Map.toList x) (elems m)
348 --------------------------------------
349 fisNgrams :: [Ngrams]
350 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
351 --------------------------------------
352 docs :: Double
353 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
354 --------------------------------------
355 cooc :: Map (Int, Int) (Double)
356 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
357 --------------------------------------
358
359
360 phyloCooc :: Map (Int, Int) Double
361 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
362
363
364 ------------------------------------------------------------------------
365 -- | STEP 9 | -- Build level 1 of the Phylo
366
367
368 -- | To Cliques into Groups
369 cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) Fis -> Phylo -> PhyloGroup
370 cliqueToGroup period lvl idx label fis m p = PhyloGroup ((period, lvl), idx)
371 label
372 ngrams
373 (singleton "support" (fromIntegral $ snd fis))
374 cooc
375 [] [] [] []
376 where
377 --------------------------------------
378 ngrams :: [Int]
379 ngrams = sort $ map (\x -> ngramsToIdx x p)
380 $ Set.toList
381 $ fst fis
382 --------------------------------------
383 cooc :: Map (Int, Int) Double
384 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
385 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
386 --------------------------------------
387
388
389 -- | To transform Fis into PhyloLevels
390 fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
391 fisToPhyloLevel m p = over (phylo_periods . traverse)
392 (\period ->
393 let periodId = _phylo_periodId period
394 fisList = zip [1..] (Map.toList (m ! periodId))
395 in over (phylo_periodLevels)
396 (\levels ->
397 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
398 in levels ++ [PhyloLevel (periodId, 1) groups]
399 ) period ) p
400
401
402 phyloLinked_0_1 :: Phylo
403 phyloLinked_0_1 = alterLevelLinks lvl_0_1 phyloLinked_1_0
404
405
406 lvl_0_1 :: LevelLink
407 lvl_0_1 = initLevelLink (initLevel 0 Level_0) (initLevel 1 Level_1)
408
409
410 phyloLinked_1_0 :: Phylo
411 phyloLinked_1_0 = alterLevelLinks lvl_1_0 phyloWithGroups1
412
413
414 lvl_1_0 :: LevelLink
415 lvl_1_0 = initLevelLink (initLevel 1 Level_1) (initLevel 0 Level_0)
416
417
418 phyloWithGroups1 :: Phylo
419 phyloWithGroups1 = updatePhyloByLevel (initLevel 1 Level_1) phyloLinked_m1_0
420
421
422 ------------------------------------------------------------------------
423 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
424
425
426 -- | To Filter Fis by support
427 filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
428 filterFisBySupport empty min m = case empty of
429 True -> Map.map (\fis -> filterMinorFis min fis) m
430 False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
431
432
433 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
434 filterMinorFis :: Int -> Fis -> Fis
435 filterMinorFis min fis = Map.filter (\s -> s > min) fis
436
437
438 -- | To filter Fis with small Support but by keeping non empty Periods
439 filterMinorFisNonEmpty :: Int -> Fis -> Fis
440 filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
441 then filterMinorFisNonEmpty (min - 1) fis
442 else fis'
443 where
444 --------------------------------------
445 fis' :: Fis
446 fis' = filterMinorFis min fis
447 --------------------------------------
448
449
450 -- | To filter nested Fis
451 filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
452 filterFisByNested = map (\fis -> restrictKeys fis
453 $ Set.fromList
454 $ filterNestedSets (head (keys fis)) (keys fis) []
455 )
456
457
458 -- | To transform a list of Documents into a Frequent Items Set
459 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) Fis
460 docsToFis docs = map (\d -> fisWithSizePolyMap
461 (Segment 1 20)
462 1
463 (map (words . text) d)) docs
464
465
466 phyloFisFiltered :: Map (Date, Date) Fis
467 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
468
469
470 phyloFis :: Map (Date, Date) Fis
471 phyloFis = docsToFis phyloPeriods
472
473
474 ------------------------------------------------------------------------
475 -- | STEP 7 | -- Link level -1 to level 0
476
477
478 phyloLinked_m1_0 :: Phylo
479 phyloLinked_m1_0 = alterLevelLinks lvl_m1_0 phyloLinked_0_m1
480
481
482 lvl_m1_0 :: LevelLink
483 lvl_m1_0 = initLevelLink (initLevel (-1) Level_m1) (initLevel 0 Level_0)
484
485
486 ------------------------------------------------------------------------
487 -- | STEP 6 | -- Link level 0 to level -1
488
489
490 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
491 linkGroupToGroups :: LevelLink -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
492 linkGroupToGroups lvl current targets
493 | getLevelLinkValue From lvl < getLevelLinkValue To lvl = setLevelParents current
494 | getLevelLinkValue From lvl > getLevelLinkValue To lvl = setLevelChilds current
495 | otherwise = current
496 where
497 --------------------------------------
498 setLevelChilds :: PhyloGroup -> PhyloGroup
499 setLevelChilds = over (phylo_groupLevelChilds) addPointers
500 --------------------------------------
501 setLevelParents :: PhyloGroup -> PhyloGroup
502 setLevelParents = over (phylo_groupLevelParents) addPointers
503 --------------------------------------
504 addPointers :: [Pointer] -> [Pointer]
505 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
506 if shouldLink lvl (_phylo_groupNgrams current)
507 (_phylo_groupNgrams target )
508 then Just ((getGroupId target),1)
509 else Nothing) targets
510 --------------------------------------
511
512
513 -- | To set the LevelLinks between two lists of PhyloGroups
514 linkGroupsByLevel :: LevelLink -> Phylo -> [PhyloGroup] -> [PhyloGroup]
515 linkGroupsByLevel lvl p groups = map (\group ->
516 if getGroupLevel group == getLevelLinkValue From lvl
517 then linkGroupToGroups lvl group (getGroupsWithFilters (getLevelLinkValue To lvl) (getGroupPeriod group) p)
518 else group ) groups
519
520
521 -- | To set the LevelLink of all the PhyloGroups of a Phylo
522 alterLevelLinks :: LevelLink -> Phylo -> Phylo
523 alterLevelLinks lvl p = alterPhyloGroups (linkGroupsByLevel lvl p) p
524
525
526 phyloLinked_0_m1 :: Phylo
527 phyloLinked_0_m1 = alterLevelLinks lvl_0_m1 phyloWithGroups0
528
529
530 lvl_0_m1 :: LevelLink
531 lvl_0_m1 = initLevelLink (initLevel 0 Level_0) (initLevel (-1) Level_m1)
532
533
534 ------------------------------------------------------------------------
535 -- | STEP 5 | -- Build level 0 as a copy of level -1
536
537
538 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
539 clonePhyloLevel :: Int -> Phylo -> Phylo
540 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel
541 (setPhyloLevelId lvl $ head l)
542 l) p
543
544
545 phyloWithGroups0 :: Phylo
546 phyloWithGroups0 = updatePhyloByLevel (initLevel 0 Level_0) phyloWithGroupsm1
547
548
549 ------------------------------------------------------------------------
550 -- | STEP 4 | -- Build level -1
551
552
553 -- | To transform a list of Documents into a PhyloLevel
554 docsToPhyloLevel :: Int ->(Date, Date) -> [Document] -> Phylo -> PhyloLevel
555 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
556 ((d, d'), lvl)
557 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
558 $ zip [1..]
559 $ (nub . concat)
560 $ map (words . text) docs)
561
562
563 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
564 docsToPhyloPeriods :: Int -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
565 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
566 $ Map.toList levels
567 where
568 --------------------------------------
569 levels :: Map (Date,Date) [PhyloLevel]
570 levels = mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
571 --------------------------------------
572
573
574 -- | To update a Phylo for a given Levels
575 updatePhyloByLevel :: Level -> Phylo -> Phylo
576 updatePhyloByLevel lvl p
577 = case getLevelLabel lvl of
578
579 Level_m1 -> appendPhyloPeriods (docsToPhyloPeriods (getLevelValue lvl) lvlData p) p
580 where
581 --------------------------------------
582 lvlData :: Map (Date,Date) [Document]
583 lvlData = phyloPeriods
584 --------------------------------------
585
586 Level_0 -> clonePhyloLevel (getLevelValue lvl) p
587
588 Level_1 -> fisToPhyloLevel lvlData p
589 where
590 --------------------------------------
591 lvlData :: Map (Date, Date) Fis
592 lvlData = phyloFisFiltered
593 --------------------------------------
594
595 _ -> panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
596
597
598 phyloWithGroupsm1 :: Phylo
599 phyloWithGroupsm1 = updatePhyloByLevel (initLevel (-1) Level_m1) phylo
600
601
602 ------------------------------------------------------------------------
603 -- | STEP 3 | -- Parse the Documents and group them by Periods
604
605
606 -- | To init a set of periods out of a given Grain and Step
607 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
608 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
609 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
610 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
611 where
612 --------------------------------------
613 hs = steps g s $ both f (head es, last es)
614 --------------------------------------
615 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
616 inPeriode f' h (start,end) =
617 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
618 --------------------------------------
619 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
620 steps s' o' (start,end) = map (\l -> (head l, last l))
621 $ chunkAlong s' o' [start .. end]
622 --------------------------------------
623
624
625 -- | To parse a list of Documents by filtering on a Vector of Ngrams
626 parseDocs :: PhyloNgrams -> [Document] -> [Document]
627 parseDocs l docs = map (\(Document d t) -> Document d (unwords
628 $ filter (\x -> Vector.elem x l)
629 $ monoTexts t)) docs
630
631
632 -- | To group a list of Documents by fixed periods
633 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
634 groupDocsByPeriod g s docs p = docsToPeriods date g s
635 $ parseDocs (getPhyloNgrams p) docs
636
637
638 phyloPeriods :: Map (Date, Date) [Document]
639 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
640
641
642 ------------------------------------------------------------------------
643 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
644
645
646 -- | To init a Phylomemy
647 initPhylo :: [Document] -> PhyloNgrams -> Phylo
648 initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
649
650
651 -- | To init a PhyloNgrams as a Vector of Ngrams
652 initNgrams :: [Ngrams] -> PhyloNgrams
653 initNgrams l = Vector.fromList $ map toLower l
654
655
656 phylo :: Phylo
657 phylo = initPhylo phyloDocs (initNgrams actants)
658
659
660 ------------------------------------------------------------------------
661 -- | STEP 1 | -- Get a list of Document
662
663
664 -- | To transform a corpus of texts into a structured list of Documents
665 corpusToDocs :: [(Date, Text)] -> [Document]
666 corpusToDocs l = map (\(d,t) -> Document d t) l
667
668
669 phyloDocs :: [Document]
670 phyloDocs = corpusToDocs corpus
671
672
673 ------------------------------------------------------------------------
674 -- | STEP 0 | -- Let's start with an example
675
676
677 actants :: [Ngrams]
678 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
679 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
680 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
681
682
683 corpus :: [(Date, Text)]
684 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]