]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Example.hs
Add the branches detection
[gargantext.git] / src / Gargantext / Viz / Phylo / Example.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
13
14
15 TODO:
16 - split the functions : RAW -> Document -> Ngrams
17
18 -- reverse history: antechronologique
19 -- metrics support
20
21
22 -}
23
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
27
28 module Gargantext.Viz.Phylo.Example where
29
30 import Control.Lens hiding (makeLenses, both, Level)
31
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
36 import Data.Set (Set)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
41
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
47
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
55
56
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
59
60
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
63
64
65 ------------------------------------------------------------------------
66 -- | STEP 12 | -- Find the Branches
67
68
69 -- | To add a PhyloGroupId to list of Branches with conditions
70 addToBranches :: (Int,Int) -> PhyloGroupId -> [PhyloBranch] -> [PhyloBranch]
71 addToBranches (lvl,idx) id branches
72 | null branches = [newBranch]
73 | idx == lastIdx = (init branches) ++ [addGroupIdToBranch id (last branches)]
74 | otherwise = branches ++ [newBranch]
75 where
76 --------------------------------------
77 newBranch :: PhyloBranch
78 newBranch = PhyloBranch (lvl,idx) "" [id]
79 --------------------------------------
80 lastIdx :: Int
81 lastIdx = (snd . _phylo_branchId . last) branches
82 --------------------------------------
83
84
85 -- | To transform a list of PhyloGroups into a list of PhyloBranches where :
86 -- curr = the current PhyloGroup
87 -- rest = the rest of the initial list of PhyloGroups
88 -- next = the next PhyloGroups to be added in the current Branch
89 -- memo = the memory of the allready created Branches, the last one is the current one
90 groupsToBranches :: (Int,Int) -> PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloBranch] -> Phylo -> [PhyloBranch]
91 groupsToBranches (lvl,idx) curr rest next memo p
92 | null rest' && null next' = memo'
93 | (not . null) next' = groupsToBranches (lvl,idx) (head next') rest' (tail next') memo' p
94 | otherwise = groupsToBranches (lvl,idx + 1) (head rest') (tail rest') [] memo' p
95 where
96 --------------------------------------
97 done :: [PhyloGroups]
98 done = getGroupsFromIds (concat $ map (_phylo_branchGroups) memo) p
99 --------------------------------------
100 memo' :: [PhyloBranch]
101 memo' = addToBranches (lvl,idx) (getGroupId curr) memo
102 --------------------------------------
103 next' :: [PhyloGroups]
104 next' = filter (\x -> not $ elem x done) $ nub $ next ++ (getGroupPairs curr p)
105 --------------------------------------
106 rest' :: [PhyloGroups]
107 rest' = filter (\x -> not $ elem x next') rest
108 --------------------------------------
109
110
111 -- | To set all the PhyloBranches for a given Level in a Phylo
112 setPhyloBranches :: Level -> Phylo -> Phylo
113 setPhyloBranches lvl p = alterPhyloBranches
114 (\branches -> branches ++ (groupsToBranches
115 (getLevelValue lvl, 0)
116 (head groups)
117 (tail groups)
118 [] [] p)
119 ) p
120 where
121 --------------------------------------
122 groups :: [PhyloGroup]
123 groups = getGroupsWithLevel (getLevelValue lvl) p
124 --------------------------------------
125
126
127 phyloWithBranches_1 = setPhyloBranches (initLevel 1 Level_1) phyloWithPair_1_Childs
128
129
130 ------------------------------------------------------------------------
131 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
132
133
134 -- | To process the weightedLogJaccard between two PhyloGroups fields
135 weightedLogJaccard :: Double -> PhyloGroupId -> Map (Int, Int) Double -> Map (Int, Int) Double -> (PhyloGroupId, Double)
136 weightedLogJaccard s id f1 f2
137 | null wUnion = (id,0)
138 | wUnion == wInter = (id,1)
139 | s == 0 = (id,(fromIntegral $ length wInter)/(fromIntegral $ length wUnion))
140 | s > 0 = (id,(sumInvLog wInter)/(sumInvLog wUnion))
141 | otherwise = (id,(sumLog wInter)/(sumLog wUnion))
142 where
143 --------------------------------------
144 wInter :: [Double]
145 wInter = elems $ intersectionWith (+) f1 f2
146 --------------------------------------
147 wUnion :: [Double]
148 wUnion = elems $ unionWith (+) f1 f2
149 --------------------------------------
150 sumInvLog :: [Double] -> Double
151 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
152 --------------------------------------
153 sumLog :: [Double] -> Double
154 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
155 --------------------------------------
156
157
158 -- | To apply the corresponding proximity function based on a given Proximity
159 getProximity :: Proximity -> Double -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
160 getProximity prox s g1 g2 = case prox of
161 WeightedLogJaccard -> weightedLogJaccard s (getGroupId g2) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1))
162 Other -> undefined
163 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
164
165
166 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
167 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
168 getNextPeriods to id l = case to of
169 Childs -> unNested id ((tail . snd) next)
170 Parents -> unNested id ((reverse . fst) next)
171 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
172 where
173 --------------------------------------
174 next :: ([PhyloPeriodId], [PhyloPeriodId])
175 next = splitAt idx l
176 --------------------------------------
177 idx :: Int
178 idx = case (List.elemIndex id l) of
179 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
180 Just i -> i
181 --------------------------------------
182 -- | To have an non-overlapping next period
183 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
184 unNested x l
185 | null l = []
186 | nested (fst $ head l) x = unNested x (tail l)
187 | nested (snd $ head l) x = unNested x (tail l)
188 | otherwise = l
189 --------------------------------------
190 nested :: Date -> PhyloPeriodId -> Bool
191 nested d prd = d >= fst prd && d <= snd prd
192 --------------------------------------
193
194
195 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
196 findBestCandidates :: PairTo -> Int -> Int -> Double -> Double -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
197 findBestCandidates to depth max thr s group p
198 | depth > max || null next = []
199 | (not . null) best = take 2 best
200 | otherwise = findBestCandidates to (depth + 1) max thr s group p
201 where
202 --------------------------------------
203 next :: [PhyloPeriodId]
204 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
205 --------------------------------------
206 candidates :: [PhyloGroup]
207 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
208 --------------------------------------
209 scores :: [(PhyloGroupId, Double)]
210 scores = map (\group' -> getProximity WeightedLogJaccard s group group') candidates
211 --------------------------------------
212 best :: [(PhyloGroupId, Double)]
213 best = reverse
214 $ sortOn snd
215 $ filter (\(id,score) -> score >= thr) scores
216 --------------------------------------
217
218
219 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
220 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
221 makePair to group ids = case to of
222 Childs -> over (phylo_groupPeriodChilds) addPointers group
223 Parents -> over (phylo_groupPeriodParents) addPointers group
224 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
225 where
226 --------------------------------------
227 addPointers :: [Pointer] -> [Pointer]
228 addPointers l = nub $ (l ++ ids)
229 --------------------------------------
230
231
232 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
233 pairGroupsToGroups :: PairTo -> Level -> Double -> Double -> Phylo -> Phylo
234 pairGroupsToGroups to lvl thr s p = alterPhyloGroups
235 (\groups ->
236 map (\group ->
237 if (getGroupLevel group) == (getLevelValue lvl)
238 then
239 let
240 --------------------------------------
241 candidates :: [(PhyloGroupId, Double)]
242 candidates = findBestCandidates to 1 5 thr s group p
243 --------------------------------------
244 in
245 makePair to group candidates
246 else
247 group ) groups) p
248
249
250 phyloWithPair_1_Childs :: Phylo
251 phyloWithPair_1_Childs = pairGroupsToGroups Childs (initLevel 1 Level_1) 0.01 0 phyloWithPair_1_Parents
252
253
254 phyloWithPair_1_Parents :: Phylo
255 phyloWithPair_1_Parents = pairGroupsToGroups Parents (initLevel 1 Level_1) 0.01 0 phyloLinked_0_1
256
257
258 ------------------------------------------------------------------------
259 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
260
261
262 -- | Are two PhyloGroups sharing at leats one Ngrams
263 shareNgrams :: PhyloGroup -> PhyloGroup -> Bool
264 shareNgrams g g' = (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')
265
266
267 -- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
268 getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
269 getKeyPair (x,y) m = case findPair (x,y) m of
270 Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
271 Just i -> i
272 where
273 --------------------------------------
274 findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
275 findPair (x,y) m
276 | member (x,y) m = Just (x,y)
277 | member (y,x) m = Just (y,x)
278 | otherwise = Nothing
279 --------------------------------------
280
281
282 -- | To get all combinations of a list with no repetition and apply a function to the resulting list of pairs
283 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
284 listToCombi f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
285
286
287 -- | To transform the Fis into a coocurency Matrix in a Phylo
288 fisToCooc :: Map (Date, Date) Fis -> Phylo -> Map (Int, Int) Double
289 fisToCooc m p = map (/docs)
290 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
291 $ concat
292 $ map (\x -> listToCombi (\x -> ngramsToIdx x p) $ (Set.toList . fst) x) fis
293 where
294 --------------------------------------
295 fis :: [(Clique,Support)]
296 fis = concat $ map (\x -> Map.toList x) (elems m)
297 --------------------------------------
298 fisNgrams :: [Ngrams]
299 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
300 --------------------------------------
301 docs :: Double
302 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
303 --------------------------------------
304 cooc :: Map (Int, Int) (Double)
305 cooc = Map.fromList $ map (\x -> (x,0)) (listToCombi (\x -> ngramsToIdx x p) fisNgrams)
306 --------------------------------------
307
308
309 phyloCooc :: Map (Int, Int) Double
310 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
311
312
313 ------------------------------------------------------------------------
314 -- | STEP 9 | -- Build level 1 of the Phylo
315
316
317 -- | To Cliques into Groups
318 cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) Fis -> Phylo -> PhyloGroup
319 cliqueToGroup period lvl idx label fis m p = PhyloGroup ((period, lvl), idx)
320 label
321 ngrams
322 (singleton "support" (fromIntegral $ snd fis))
323 cooc
324 [] [] [] []
325 where
326 --------------------------------------
327 ngrams :: [Int]
328 ngrams = sort $ map (\x -> ngramsToIdx x p)
329 $ Set.toList
330 $ fst fis
331 --------------------------------------
332 cooc :: Map (Int, Int) Double
333 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
334 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
335 --------------------------------------
336
337
338 -- | To transform Fis into PhyloLevels
339 fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
340 fisToPhyloLevel m p = over (phylo_periods . traverse)
341 (\period ->
342 let periodId = _phylo_periodId period
343 fisList = zip [1..] (Map.toList (m ! periodId))
344 in over (phylo_periodLevels)
345 (\levels ->
346 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
347 in levels ++ [PhyloLevel (periodId, 1) groups]
348 ) period ) p
349
350
351 phyloLinked_0_1 :: Phylo
352 phyloLinked_0_1 = alterLevelLinks lvl_0_1 phyloLinked_1_0
353
354
355 lvl_0_1 :: LevelLink
356 lvl_0_1 = initLevelLink (initLevel 0 Level_0) (initLevel 1 Level_1)
357
358
359 phyloLinked_1_0 :: Phylo
360 phyloLinked_1_0 = alterLevelLinks lvl_1_0 phyloWithGroups1
361
362
363 lvl_1_0 :: LevelLink
364 lvl_1_0 = initLevelLink (initLevel 1 Level_1) (initLevel 0 Level_0)
365
366
367 phyloWithGroups1 :: Phylo
368 phyloWithGroups1 = updatePhyloByLevel (initLevel 1 Level_1) phyloLinked_m1_0
369
370
371 ------------------------------------------------------------------------
372 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
373
374
375 -- | To Filter Fis by support
376 filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
377 filterFisBySupport empty min m = case empty of
378 True -> Map.map (\fis -> filterMinorFis min fis) m
379 False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
380
381
382 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
383 filterMinorFis :: Int -> Fis -> Fis
384 filterMinorFis min fis = Map.filter (\s -> s > min) fis
385
386
387 -- | To filter Fis with small Support but by keeping non empty Periods
388 filterMinorFisNonEmpty :: Int -> Fis -> Fis
389 filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
390 then filterMinorFisNonEmpty (min - 1) fis
391 else fis'
392 where
393 --------------------------------------
394 fis' :: Fis
395 fis' = filterMinorFis min fis
396 --------------------------------------
397
398
399 -- | To filter nested Fis
400 filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
401 filterFisByNested = map (\fis -> restrictKeys fis
402 $ Set.fromList
403 $ filterNestedSets (head (keys fis)) (keys fis) []
404 )
405
406
407 -- | To transform a list of Documents into a Frequent Items Set
408 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) Fis
409 docsToFis docs = map (\d -> fisWithSizePolyMap
410 (Segment 1 20)
411 1
412 (map (words . text) d)) docs
413
414
415 phyloFisFiltered :: Map (Date, Date) Fis
416 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
417
418
419 phyloFis :: Map (Date, Date) Fis
420 phyloFis = docsToFis phyloPeriods
421
422
423 ------------------------------------------------------------------------
424 -- | STEP 7 | -- Link level -1 to level 0
425
426
427 phyloLinked_m1_0 :: Phylo
428 phyloLinked_m1_0 = alterLevelLinks lvl_m1_0 phyloLinked_0_m1
429
430
431 lvl_m1_0 :: LevelLink
432 lvl_m1_0 = initLevelLink (initLevel (-1) Level_m1) (initLevel 0 Level_0)
433
434
435 ------------------------------------------------------------------------
436 -- | STEP 6 | -- Link level 0 to level -1
437
438
439 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
440 linkGroupToGroups :: LevelLink -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
441 linkGroupToGroups lvl current targets
442 | getLevelLinkValue From lvl < getLevelLinkValue To lvl = setLevelParents current
443 | getLevelLinkValue From lvl > getLevelLinkValue To lvl = setLevelChilds current
444 | otherwise = current
445 where
446 --------------------------------------
447 setLevelChilds :: PhyloGroup -> PhyloGroup
448 setLevelChilds = over (phylo_groupLevelChilds) addPointers
449 --------------------------------------
450 setLevelParents :: PhyloGroup -> PhyloGroup
451 setLevelParents = over (phylo_groupLevelParents) addPointers
452 --------------------------------------
453 addPointers :: [Pointer] -> [Pointer]
454 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
455 if shouldLink lvl (_phylo_groupNgrams current)
456 (_phylo_groupNgrams target )
457 then Just ((getGroupId target),1)
458 else Nothing) targets
459 --------------------------------------
460
461
462 -- | To set the LevelLinks between two lists of PhyloGroups
463 linkGroupsByLevel :: LevelLink -> Phylo -> [PhyloGroup] -> [PhyloGroup]
464 linkGroupsByLevel lvl p groups = map (\group ->
465 if getGroupLevel group == getLevelLinkValue From lvl
466 then linkGroupToGroups lvl group (getGroupsWithFilters (getLevelLinkValue To lvl) (getGroupPeriod group) p)
467 else group ) groups
468
469
470 -- | To set the LevelLink of all the PhyloGroups of a Phylo
471 alterLevelLinks :: LevelLink -> Phylo -> Phylo
472 alterLevelLinks lvl p = alterPhyloGroups (linkGroupsByLevel lvl p) p
473
474
475 phyloLinked_0_m1 :: Phylo
476 phyloLinked_0_m1 = alterLevelLinks lvl_0_m1 phyloWithGroups0
477
478
479 lvl_0_m1 :: LevelLink
480 lvl_0_m1 = initLevelLink (initLevel 0 Level_0) (initLevel (-1) Level_m1)
481
482
483 ------------------------------------------------------------------------
484 -- | STEP 5 | -- Build level 0 as a copy of level -1
485
486
487 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
488 clonePhyloLevel :: Int -> Phylo -> Phylo
489 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel
490 (setPhyloLevelId lvl $ head l)
491 l) p
492
493
494 phyloWithGroups0 :: Phylo
495 phyloWithGroups0 = updatePhyloByLevel (initLevel 0 Level_0) phyloWithGroupsm1
496
497
498 ------------------------------------------------------------------------
499 -- | STEP 4 | -- Build level -1
500
501
502 -- | To transform a list of Documents into a PhyloLevel
503 docsToPhyloLevel :: Int ->(Date, Date) -> [Document] -> Phylo -> PhyloLevel
504 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
505 ((d, d'), lvl)
506 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
507 $ zip [1..]
508 $ (nub . concat)
509 $ map (words . text) docs)
510
511
512 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
513 docsToPhyloPeriods :: Int -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
514 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
515 $ Map.toList levels
516 where
517 --------------------------------------
518 levels :: Map (Date,Date) [PhyloLevel]
519 levels = mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
520 --------------------------------------
521
522
523 -- | To update a Phylo for a given Levels
524 updatePhyloByLevel :: Level -> Phylo -> Phylo
525 updatePhyloByLevel lvl p
526 = case getLevelLabel lvl of
527
528 Level_m1 -> appendPhyloPeriods (docsToPhyloPeriods (getLevelValue lvl) lvlData p) p
529 where
530 --------------------------------------
531 lvlData :: Map (Date,Date) [Document]
532 lvlData = phyloPeriods
533 --------------------------------------
534
535 Level_0 -> clonePhyloLevel (getLevelValue lvl) p
536
537 Level_1 -> fisToPhyloLevel lvlData p
538 where
539 --------------------------------------
540 lvlData :: Map (Date, Date) Fis
541 lvlData = phyloFisFiltered
542 --------------------------------------
543
544 _ -> panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
545
546
547 phyloWithGroupsm1 :: Phylo
548 phyloWithGroupsm1 = updatePhyloByLevel (initLevel (-1) Level_m1) phylo
549
550
551 ------------------------------------------------------------------------
552 -- | STEP 3 | -- Parse the Documents and group them by Periods
553
554
555 -- | To init a set of periods out of a given Grain and Step
556 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
557 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
558 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
559 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
560 where
561 --------------------------------------
562 hs = steps g s $ both f (head es, last es)
563 --------------------------------------
564 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
565 inPeriode f' h (start,end) =
566 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
567 --------------------------------------
568 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
569 steps s' o' (start,end) = map (\l -> (head l, last l))
570 $ chunkAlong s' o' [start .. end]
571 --------------------------------------
572
573
574 -- | To parse a list of Documents by filtering on a Vector of Ngrams
575 parseDocs :: PhyloNgrams -> [Document] -> [Document]
576 parseDocs l docs = map (\(Document d t) -> Document d (unwords
577 $ filter (\x -> Vector.elem x l)
578 $ monoTexts t)) docs
579
580
581 -- | To group a list of Documents by fixed periods
582 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
583 groupDocsByPeriod g s docs p = docsToPeriods date g s
584 $ parseDocs (getPhyloNgrams p) docs
585
586
587 phyloPeriods :: Map (Date, Date) [Document]
588 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
589
590
591 ------------------------------------------------------------------------
592 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
593
594
595 -- | To init a Phylomemy
596 initPhylo :: [Document] -> PhyloNgrams -> Phylo
597 initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
598
599
600 -- | To init a PhyloNgrams as a Vector of Ngrams
601 initNgrams :: [Ngrams] -> PhyloNgrams
602 initNgrams l = Vector.fromList $ map toLower l
603
604
605 phylo :: Phylo
606 phylo = initPhylo phyloDocs (initNgrams actants)
607
608
609 ------------------------------------------------------------------------
610 -- | STEP 1 | -- Get a list of Document
611
612
613 -- | To transform a corpus of texts into a structured list of Documents
614 corpusToDocs :: [(Date, Text)] -> [Document]
615 corpusToDocs l = map (\(d,t) -> Document d t) l
616
617
618 phyloDocs :: [Document]
619 phyloDocs = corpusToDocs corpus
620
621
622 ------------------------------------------------------------------------
623 -- | STEP 0 | -- Let's start with an example
624
625
626 actants :: [Ngrams]
627 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
628 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
629 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
630
631
632 corpus :: [(Date, Text)]
633 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]