]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Example.hs
Add the first steps the clustering
[gargantext.git] / src / Gargantext / Viz / Phylo / Example.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
13
14
15 TODO:
16 - split the functions : RAW -> Document -> Ngrams
17
18 -- reverse history: antechronologique
19 -- metrics support
20
21
22 -}
23
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
27
28 module Gargantext.Viz.Phylo.Example where
29
30 import Control.Lens hiding (makeLenses, both, Level)
31
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
36 import Data.Set (Set)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
41
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
47
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
55
56
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
59
60
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
63
64
65 -- Il faudrait plutôt passer (Proximity,[Double]) où [Double] serait la liste des paramètres
66 groupsToGraph :: Proximity -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
67 groupsToGraph prox groups = case prox of
68 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard 0 (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) edges
69 _ -> undefined
70 where
71 edges :: [(PhyloGroup,PhyloGroup)]
72 edges = listToDirectedCombi groups
73
74
75 phyloToGraphs :: Level -> Proximity -> Phylo -> Map (Date,Date) [((PhyloGroup,PhyloGroup),Double)]
76 phyloToGraphs lvl prox p = Map.fromList
77 $ zip periods
78 (map (\prd -> groupsToGraph prox
79 $ getGroupsWithFilters (getLevelValue lvl) prd p) periods)
80 where
81 --------------------------------------
82 periods :: [PhyloPeriodId]
83 periods = getPhyloPeriods p
84 --------------------------------------
85
86
87 ------------------------------------------------------------------------
88 -- | STEP 12 | -- Find the Branches
89
90
91 -- | To add a PhyloGroupId to list of Branches with conditions
92 addToBranches :: (Int,Int) -> PhyloGroupId -> [PhyloBranch] -> [PhyloBranch]
93 addToBranches (lvl,idx) id branches
94 | null branches = [newBranch]
95 | idx == lastIdx = (init branches) ++ [addGroupIdToBranch id (last branches)]
96 | otherwise = branches ++ [newBranch]
97 where
98 --------------------------------------
99 newBranch :: PhyloBranch
100 newBranch = PhyloBranch (lvl,idx) "" [id]
101 --------------------------------------
102 lastIdx :: Int
103 lastIdx = (snd . _phylo_branchId . last) branches
104 --------------------------------------
105
106
107 -- | To transform a list of PhyloGroups into a list of PhyloBranches where :
108 -- curr = the current PhyloGroup
109 -- rest = the rest of the initial list of PhyloGroups
110 -- next = the next PhyloGroups to be added in the current Branch
111 -- memo = the memory of the allready created Branches, the last one is the current one
112 groupsToBranches :: (Int,Int) -> PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloBranch] -> Phylo -> [PhyloBranch]
113 groupsToBranches (lvl,idx) curr rest next memo p
114 | null rest' && null next' = memo'
115 | (not . null) next' = groupsToBranches (lvl,idx) (head next') rest' (tail next') memo' p
116 | otherwise = groupsToBranches (lvl,idx + 1) (head rest') (tail rest') [] memo' p
117 where
118 --------------------------------------
119 done :: [PhyloGroup]
120 done = getGroupsFromIds (concat $ map (_phylo_branchGroups) memo) p
121 --------------------------------------
122 memo' :: [PhyloBranch]
123 memo' = addToBranches (lvl,idx) (getGroupId curr) memo
124 --------------------------------------
125 next' :: [PhyloGroup]
126 next' = filter (\x -> not $ elem x done) $ nub $ next ++ (getGroupPairs curr p)
127 --------------------------------------
128 rest' :: [PhyloGroup]
129 rest' = filter (\x -> not $ elem x next') rest
130 --------------------------------------
131
132
133 -- | To set all the PhyloBranches for a given Level in a Phylo
134 setPhyloBranches :: Level -> Phylo -> Phylo
135 setPhyloBranches lvl p = alterPhyloBranches
136 (\branches -> branches ++ (groupsToBranches
137 (getLevelValue lvl, 0)
138 (head groups)
139 (tail groups)
140 [] [] p)
141 ) p
142 where
143 --------------------------------------
144 groups :: [PhyloGroup]
145 groups = getGroupsWithLevel (getLevelValue lvl) p
146 --------------------------------------
147
148
149 phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs
150
151
152 ------------------------------------------------------------------------
153 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
154
155
156 -- | To process the weightedLogJaccard between two PhyloGroups fields
157 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
158 weightedLogJaccard s f1 f2
159 | null wUnion = 0
160 | wUnion == wInter = 1
161 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
162 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
163 | otherwise = (sumLog wInter)/(sumLog wUnion)
164 where
165 --------------------------------------
166 wInter :: [Double]
167 wInter = elems $ intersectionWith (+) f1 f2
168 --------------------------------------
169 wUnion :: [Double]
170 wUnion = elems $ unionWith (+) f1 f2
171 --------------------------------------
172 sumInvLog :: [Double] -> Double
173 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
174 --------------------------------------
175 sumLog :: [Double] -> Double
176 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
177 --------------------------------------
178
179
180 -- | To apply the corresponding proximity function based on a given Proximity
181 getProximity :: Proximity -> Double -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
182 getProximity prox s g1 g2 = case prox of
183 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
184 Other -> undefined
185 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
186
187
188 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
189 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
190 getNextPeriods to id l = case to of
191 Childs -> unNested id ((tail . snd) next)
192 Parents -> unNested id ((reverse . fst) next)
193 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
194 where
195 --------------------------------------
196 next :: ([PhyloPeriodId], [PhyloPeriodId])
197 next = splitAt idx l
198 --------------------------------------
199 idx :: Int
200 idx = case (List.elemIndex id l) of
201 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
202 Just i -> i
203 --------------------------------------
204 -- | To have an non-overlapping next period
205 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
206 unNested x l
207 | null l = []
208 | nested (fst $ head l) x = unNested x (tail l)
209 | nested (snd $ head l) x = unNested x (tail l)
210 | otherwise = l
211 --------------------------------------
212 nested :: Date -> PhyloPeriodId -> Bool
213 nested d prd = d >= fst prd && d <= snd prd
214 --------------------------------------
215
216
217 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
218 findBestCandidates :: PairTo -> Int -> Int -> Double -> Double -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
219 findBestCandidates to depth max thr s group p
220 | depth > max || null next = []
221 | (not . null) best = take 2 best
222 | otherwise = findBestCandidates to (depth + 1) max thr s group p
223 where
224 --------------------------------------
225 next :: [PhyloPeriodId]
226 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
227 --------------------------------------
228 candidates :: [PhyloGroup]
229 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
230 --------------------------------------
231 scores :: [(PhyloGroupId, Double)]
232 scores = map (\group' -> getProximity WeightedLogJaccard s group group') candidates
233 --------------------------------------
234 best :: [(PhyloGroupId, Double)]
235 best = reverse
236 $ sortOn snd
237 $ filter (\(id,score) -> score >= thr) scores
238 --------------------------------------
239
240
241 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
242 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
243 makePair to group ids = case to of
244 Childs -> over (phylo_groupPeriodChilds) addPointers group
245 Parents -> over (phylo_groupPeriodParents) addPointers group
246 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
247 where
248 --------------------------------------
249 addPointers :: [Pointer] -> [Pointer]
250 addPointers l = nub $ (l ++ ids)
251 --------------------------------------
252
253
254 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
255 pairGroupsToGroups :: PairTo -> Level -> Double -> Double -> Phylo -> Phylo
256 pairGroupsToGroups to lvl thr s p = alterPhyloGroups
257 (\groups ->
258 map (\group ->
259 if (getGroupLevel group) == (getLevelValue lvl)
260 then
261 let
262 --------------------------------------
263 candidates :: [(PhyloGroupId, Double)]
264 candidates = findBestCandidates to 1 5 thr s group p
265 --------------------------------------
266 in
267 makePair to group candidates
268 else
269 group ) groups) p
270
271
272 phyloWithPair_1_Childs :: Phylo
273 phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 0 phyloWithPair_1_Parents
274
275
276 phyloWithPair_1_Parents :: Phylo
277 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 0 phyloLinked_0_1
278
279
280 ------------------------------------------------------------------------
281 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
282
283
284 -- | Are two PhyloGroups sharing at leats one Ngrams
285 shareNgrams :: PhyloGroup -> PhyloGroup -> Bool
286 shareNgrams g g' = (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')
287
288
289 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
290 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
291 getKeyPair (x,y) m = case findPair (x,y) m of
292 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
293 Just i -> i
294 where
295 --------------------------------------
296 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
297 findPair (x,y) m
298 | member (x,y) m = Just (x,y)
299 | member (y,x) m = Just (y,x)
300 | otherwise = Nothing
301 --------------------------------------
302
303
304 -- | To transform the Fis into a coocurency Matrix in a Phylo
305 fisToCooc :: Map (Date, Date) Fis -> Phylo -> Map (Int, Int) Double
306 fisToCooc m p = map (/docs)
307 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
308 $ concat
309 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x) fis
310 where
311 --------------------------------------
312 fis :: [(Clique,Support)]
313 fis = concat $ map (\x -> Map.toList x) (elems m)
314 --------------------------------------
315 fisNgrams :: [Ngrams]
316 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
317 --------------------------------------
318 docs :: Double
319 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
320 --------------------------------------
321 cooc :: Map (Int, Int) (Double)
322 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
323 --------------------------------------
324
325
326 phyloCooc :: Map (Int, Int) Double
327 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
328
329
330 ------------------------------------------------------------------------
331 -- | STEP 9 | -- Build level 1 of the Phylo
332
333
334 -- | To Cliques into Groups
335 cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) Fis -> Phylo -> PhyloGroup
336 cliqueToGroup period lvl idx label fis m p = PhyloGroup ((period, lvl), idx)
337 label
338 ngrams
339 (singleton "support" (fromIntegral $ snd fis))
340 cooc
341 [] [] [] []
342 where
343 --------------------------------------
344 ngrams :: [Int]
345 ngrams = sort $ map (\x -> ngramsToIdx x p)
346 $ Set.toList
347 $ fst fis
348 --------------------------------------
349 cooc :: Map (Int, Int) Double
350 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
351 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
352 --------------------------------------
353
354
355 -- | To transform Fis into PhyloLevels
356 fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
357 fisToPhyloLevel m p = over (phylo_periods . traverse)
358 (\period ->
359 let periodId = _phylo_periodId period
360 fisList = zip [1..] (Map.toList (m ! periodId))
361 in over (phylo_periodLevels)
362 (\levels ->
363 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
364 in levels ++ [PhyloLevel (periodId, 1) groups]
365 ) period ) p
366
367
368 phyloLinked_0_1 :: Phylo
369 phyloLinked_0_1 = alterLevelLinks lvl_0_1 phyloLinked_1_0
370
371
372 lvl_0_1 :: LevelLink
373 lvl_0_1 = initLevelLink (initLevel 0 Level_0) (initLevel 1 Level_1)
374
375
376 phyloLinked_1_0 :: Phylo
377 phyloLinked_1_0 = alterLevelLinks lvl_1_0 phyloWithGroups1
378
379
380 lvl_1_0 :: LevelLink
381 lvl_1_0 = initLevelLink (initLevel 1 Level_1) (initLevel 0 Level_0)
382
383
384 phyloWithGroups1 :: Phylo
385 phyloWithGroups1 = updatePhyloByLevel (initLevel 1 Level_1) phyloLinked_m1_0
386
387
388 ------------------------------------------------------------------------
389 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
390
391
392 -- | To Filter Fis by support
393 filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
394 filterFisBySupport empty min m = case empty of
395 True -> Map.map (\fis -> filterMinorFis min fis) m
396 False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
397
398
399 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
400 filterMinorFis :: Int -> Fis -> Fis
401 filterMinorFis min fis = Map.filter (\s -> s > min) fis
402
403
404 -- | To filter Fis with small Support but by keeping non empty Periods
405 filterMinorFisNonEmpty :: Int -> Fis -> Fis
406 filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
407 then filterMinorFisNonEmpty (min - 1) fis
408 else fis'
409 where
410 --------------------------------------
411 fis' :: Fis
412 fis' = filterMinorFis min fis
413 --------------------------------------
414
415
416 -- | To filter nested Fis
417 filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
418 filterFisByNested = map (\fis -> restrictKeys fis
419 $ Set.fromList
420 $ filterNestedSets (head (keys fis)) (keys fis) []
421 )
422
423
424 -- | To transform a list of Documents into a Frequent Items Set
425 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) Fis
426 docsToFis docs = map (\d -> fisWithSizePolyMap
427 (Segment 1 20)
428 1
429 (map (words . text) d)) docs
430
431
432 phyloFisFiltered :: Map (Date, Date) Fis
433 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
434
435
436 phyloFis :: Map (Date, Date) Fis
437 phyloFis = docsToFis phyloPeriods
438
439
440 ------------------------------------------------------------------------
441 -- | STEP 7 | -- Link level -1 to level 0
442
443
444 phyloLinked_m1_0 :: Phylo
445 phyloLinked_m1_0 = alterLevelLinks lvl_m1_0 phyloLinked_0_m1
446
447
448 lvl_m1_0 :: LevelLink
449 lvl_m1_0 = initLevelLink (initLevel (-1) Level_m1) (initLevel 0 Level_0)
450
451
452 ------------------------------------------------------------------------
453 -- | STEP 6 | -- Link level 0 to level -1
454
455
456 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
457 linkGroupToGroups :: LevelLink -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
458 linkGroupToGroups lvl current targets
459 | getLevelLinkValue From lvl < getLevelLinkValue To lvl = setLevelParents current
460 | getLevelLinkValue From lvl > getLevelLinkValue To lvl = setLevelChilds current
461 | otherwise = current
462 where
463 --------------------------------------
464 setLevelChilds :: PhyloGroup -> PhyloGroup
465 setLevelChilds = over (phylo_groupLevelChilds) addPointers
466 --------------------------------------
467 setLevelParents :: PhyloGroup -> PhyloGroup
468 setLevelParents = over (phylo_groupLevelParents) addPointers
469 --------------------------------------
470 addPointers :: [Pointer] -> [Pointer]
471 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
472 if shouldLink lvl (_phylo_groupNgrams current)
473 (_phylo_groupNgrams target )
474 then Just ((getGroupId target),1)
475 else Nothing) targets
476 --------------------------------------
477
478
479 -- | To set the LevelLinks between two lists of PhyloGroups
480 linkGroupsByLevel :: LevelLink -> Phylo -> [PhyloGroup] -> [PhyloGroup]
481 linkGroupsByLevel lvl p groups = map (\group ->
482 if getGroupLevel group == getLevelLinkValue From lvl
483 then linkGroupToGroups lvl group (getGroupsWithFilters (getLevelLinkValue To lvl) (getGroupPeriod group) p)
484 else group ) groups
485
486
487 -- | To set the LevelLink of all the PhyloGroups of a Phylo
488 alterLevelLinks :: LevelLink -> Phylo -> Phylo
489 alterLevelLinks lvl p = alterPhyloGroups (linkGroupsByLevel lvl p) p
490
491
492 phyloLinked_0_m1 :: Phylo
493 phyloLinked_0_m1 = alterLevelLinks lvl_0_m1 phyloWithGroups0
494
495
496 lvl_0_m1 :: LevelLink
497 lvl_0_m1 = initLevelLink (initLevel 0 Level_0) (initLevel (-1) Level_m1)
498
499
500 ------------------------------------------------------------------------
501 -- | STEP 5 | -- Build level 0 as a copy of level -1
502
503
504 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
505 clonePhyloLevel :: Int -> Phylo -> Phylo
506 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel
507 (setPhyloLevelId lvl $ head l)
508 l) p
509
510
511 phyloWithGroups0 :: Phylo
512 phyloWithGroups0 = updatePhyloByLevel (initLevel 0 Level_0) phyloWithGroupsm1
513
514
515 ------------------------------------------------------------------------
516 -- | STEP 4 | -- Build level -1
517
518
519 -- | To transform a list of Documents into a PhyloLevel
520 docsToPhyloLevel :: Int ->(Date, Date) -> [Document] -> Phylo -> PhyloLevel
521 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
522 ((d, d'), lvl)
523 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
524 $ zip [1..]
525 $ (nub . concat)
526 $ map (words . text) docs)
527
528
529 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
530 docsToPhyloPeriods :: Int -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
531 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
532 $ Map.toList levels
533 where
534 --------------------------------------
535 levels :: Map (Date,Date) [PhyloLevel]
536 levels = mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
537 --------------------------------------
538
539
540 -- | To update a Phylo for a given Levels
541 updatePhyloByLevel :: Level -> Phylo -> Phylo
542 updatePhyloByLevel lvl p
543 = case getLevelLabel lvl of
544
545 Level_m1 -> appendPhyloPeriods (docsToPhyloPeriods (getLevelValue lvl) lvlData p) p
546 where
547 --------------------------------------
548 lvlData :: Map (Date,Date) [Document]
549 lvlData = phyloPeriods
550 --------------------------------------
551
552 Level_0 -> clonePhyloLevel (getLevelValue lvl) p
553
554 Level_1 -> fisToPhyloLevel lvlData p
555 where
556 --------------------------------------
557 lvlData :: Map (Date, Date) Fis
558 lvlData = phyloFisFiltered
559 --------------------------------------
560
561 _ -> panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
562
563
564 phyloWithGroupsm1 :: Phylo
565 phyloWithGroupsm1 = updatePhyloByLevel (initLevel (-1) Level_m1) phylo
566
567
568 ------------------------------------------------------------------------
569 -- | STEP 3 | -- Parse the Documents and group them by Periods
570
571
572 -- | To init a set of periods out of a given Grain and Step
573 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
574 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
575 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
576 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
577 where
578 --------------------------------------
579 hs = steps g s $ both f (head es, last es)
580 --------------------------------------
581 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
582 inPeriode f' h (start,end) =
583 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
584 --------------------------------------
585 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
586 steps s' o' (start,end) = map (\l -> (head l, last l))
587 $ chunkAlong s' o' [start .. end]
588 --------------------------------------
589
590
591 -- | To parse a list of Documents by filtering on a Vector of Ngrams
592 parseDocs :: PhyloNgrams -> [Document] -> [Document]
593 parseDocs l docs = map (\(Document d t) -> Document d (unwords
594 $ filter (\x -> Vector.elem x l)
595 $ monoTexts t)) docs
596
597
598 -- | To group a list of Documents by fixed periods
599 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
600 groupDocsByPeriod g s docs p = docsToPeriods date g s
601 $ parseDocs (getPhyloNgrams p) docs
602
603
604 phyloPeriods :: Map (Date, Date) [Document]
605 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
606
607
608 ------------------------------------------------------------------------
609 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
610
611
612 -- | To init a Phylomemy
613 initPhylo :: [Document] -> PhyloNgrams -> Phylo
614 initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
615
616
617 -- | To init a PhyloNgrams as a Vector of Ngrams
618 initNgrams :: [Ngrams] -> PhyloNgrams
619 initNgrams l = Vector.fromList $ map toLower l
620
621
622 phylo :: Phylo
623 phylo = initPhylo phyloDocs (initNgrams actants)
624
625
626 ------------------------------------------------------------------------
627 -- | STEP 1 | -- Get a list of Document
628
629
630 -- | To transform a corpus of texts into a structured list of Documents
631 corpusToDocs :: [(Date, Text)] -> [Document]
632 corpusToDocs l = map (\(d,t) -> Document d t) l
633
634
635 phyloDocs :: [Document]
636 phyloDocs = corpusToDocs corpus
637
638
639 ------------------------------------------------------------------------
640 -- | STEP 0 | -- Let's start with an example
641
642
643 actants :: [Ngrams]
644 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
645 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
646 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
647
648
649 corpus :: [(Date, Text)]
650 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]