]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Example.hs
refactoring
[gargantext.git] / src / Gargantext / Viz / Phylo / Example.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Example
3 Description : Phylomemy example based on history of Cleopatre.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -- | Cesar et Cleôpatre
11 -- Exemple de phylomemie
12 -- French without accents
13
14
15 TODO:
16 - split the functions : RAW -> Document -> Ngrams
17
18 -- reverse history: antechronologique
19 -- metrics support
20
21
22 -}
23
24 {-# LANGUAGE NoImplicitPrelude #-}
25 {-# LANGUAGE FlexibleContexts #-}
26 {-# LANGUAGE OverloadedStrings #-}
27
28 module Gargantext.Viz.Phylo.Example where
29
30 import Control.Lens hiding (makeLenses, both, Level)
31
32 import Data.Bool (Bool, not)
33 import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), (!!), nub, sortOn, reverse, splitAt, take, delete, init)
34 import Data.Map (Map, elems, member, adjust, singleton, empty, (!), keys, restrictKeys, mapWithKey, filterWithKey, mapKeys, intersectionWith, unionWith)
35 import Data.Semigroup (Semigroup)
36 import Data.Set (Set)
37 import Data.Text (Text, unwords, toLower, words)
38 import Data.Tuple (fst, snd)
39 import Data.Tuple.Extra
40 import Data.Vector (Vector, fromList, elemIndex)
41
42 import Gargantext.Prelude hiding (head)
43 import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
44 import Gargantext.Text.Terms.Mono (monoTexts)
45 import Gargantext.Viz.Phylo
46 import Gargantext.Viz.Phylo.Tools
47
48 import qualified Data.Bool as Bool
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Maybe as Maybe
52 import qualified Data.Set as Set
53 import qualified Data.Tuple as Tuple
54 import qualified Data.Vector as Vector
55
56
57 ------------------------------------------------------------------------
58 -- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
59
60
61 ------------------------------------------------------------------------
62 -- | STEP 13 | -- Cluster the Fis
63
64
65 -- | To do : ajouter de nouveaux clusters / proxi
66 -- gérer les cooc à level 2 et +, idem pour les quality
67 -- réfléchir aux formats de sortie
68
69
70 -- | To apply a Clustering method to a PhyloGraph
71 graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
72 graphToClusters (clust,param) (nodes,edges) = case clust of
73 Louvain -> undefined
74 RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
75
76
77 -- | To transform a Phylo into Clusters of PhyloGroups at a given level
78 phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
79 phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
80 $ zip (getPhyloPeriods p)
81 (map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters lvl prd p) p
82 in if null (fst graph)
83 then []
84 else graphToClusters (clus,param') graph)
85 (getPhyloPeriods p))
86
87
88 -- | To transform a Cluster into a Phylogroup
89 clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [PhyloGroup] -> PhyloGroup
90 clusterToGroup prd lvl idx lbl groups = PhyloGroup ((prd, lvl), idx)
91 lbl
92 ((sort . nub . concat) $ map getGroupNgrams groups)
93 empty
94 empty
95 [] [] []
96 (map (\g -> (getGroupId g, 1)) groups)
97
98 -- | To transform a list of Clusters into a new Phylolevel
99 clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
100 clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
101 (\period ->
102 let periodId = _phylo_periodId period
103 clusters = zip [1..] (m ! periodId)
104 in over (phylo_periodLevels)
105 (\levels ->
106 let groups = map (\cluster -> clusterToGroup periodId lvl (fst cluster) "" (snd cluster)) clusters
107 in levels ++ [PhyloLevel (periodId, lvl) groups]
108 ) period) p
109
110
111 phyloWithGroups2 = clustersToPhyloLevel 2 (phyloToClusters 1 (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1) phyloWithBranches_1
112
113 ------------------------------------------------------------------------
114 -- | STEP 12 | -- Find the Branches
115
116
117 -- | To apply the related components method to a PhyloGraph
118 -- curr = the current PhyloGroup
119 -- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
120 -- next = the next PhyloGroups to be added in the cluster
121 -- memo = the memory of the allready created clusters
122 relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
123 relatedComp idx curr (nodes,edges) next memo
124 | null nodes' && null next' = memo'
125 | (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
126 | otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
127 where
128 --------------------------------------
129 memo' :: [[PhyloGroup]]
130 memo'
131 | null memo = [[curr]]
132 | idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
133 | otherwise = memo ++ [[curr]]
134 --------------------------------------
135 next' :: [PhyloGroup]
136 next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
137 --------------------------------------
138 nodes' :: [PhyloGroup]
139 nodes' = filter (\x -> not $ elem x next') nodes
140 --------------------------------------
141
142
143 -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
144 graphToBranches :: Level -> PhyloGraph -> Phylo -> [PhyloBranch]
145 graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c)) $ zip [0..] clusters
146 where
147 --------------------------------------
148 clusters :: [[PhyloGroup]]
149 clusters = relatedComp 0 (head nodes) (tail nodes,edges) [] []
150 --------------------------------------
151
152
153 -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
154 groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
155 groupsToGraph (prox,param) groups p = (groups,edges)
156 where
157 edges :: PhyloEdges
158 edges = case prox of
159 FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
160 ++
161 (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
162 WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
163 (param !! 0) (getGroupCooc x)
164 (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
165 _ -> undefined
166
167
168 -- | To set all the PhyloBranches for a given Level in a Phylo
169 setPhyloBranches :: Level -> Phylo -> Phylo
170 setPhyloBranches lvl p = alterPhyloBranches
171 (\branches -> branches
172 ++
173 (graphToBranches lvl (groupsToGraph (FromPairs,[]) (getGroupsWithLevel lvl p)p)p))p
174
175
176 phyloWithBranches_1 = setPhyloBranches 1 phyloWithPair_1_Childs
177
178
179 ------------------------------------------------------------------------
180 -- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
181
182
183 -- | To process the weightedLogJaccard between two PhyloGroups fields
184 weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
185 weightedLogJaccard s f1 f2
186 | null wUnion = 0
187 | wUnion == wInter = 1
188 | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
189 | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
190 | otherwise = (sumLog wInter)/(sumLog wUnion)
191 where
192 --------------------------------------
193 wInter :: [Double]
194 wInter = elems $ intersectionWith (+) f1 f2
195 --------------------------------------
196 wUnion :: [Double]
197 wUnion = elems $ unionWith (+) f1 f2
198 --------------------------------------
199 sumInvLog :: [Double] -> Double
200 sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
201 --------------------------------------
202 sumLog :: [Double] -> Double
203 sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
204 --------------------------------------
205
206
207 -- | To apply the corresponding proximity function based on a given Proximity
208 getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
209 getProximity (prox,param) g1 g2 = case prox of
210 WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
211 _ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
212
213
214 -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
215 getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
216 getNextPeriods to id l = case to of
217 Childs -> unNested id ((tail . snd) next)
218 Parents -> unNested id ((reverse . fst) next)
219 _ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
220 where
221 --------------------------------------
222 next :: ([PhyloPeriodId], [PhyloPeriodId])
223 next = splitAt idx l
224 --------------------------------------
225 idx :: Int
226 idx = case (List.elemIndex id l) of
227 Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
228 Just i -> i
229 --------------------------------------
230 -- | To have an non-overlapping next period
231 unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
232 unNested x l
233 | null l = []
234 | nested (fst $ head l) x = unNested x (tail l)
235 | nested (snd $ head l) x = unNested x (tail l)
236 | otherwise = l
237 --------------------------------------
238 nested :: Date -> PhyloPeriodId -> Bool
239 nested d prd = d >= fst prd && d <= snd prd
240 --------------------------------------
241
242
243 -- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
244 findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
245 findBestCandidates to depth max thr (prox,param) group p
246 | depth > max || null next = []
247 | (not . null) best = take 2 best
248 | otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
249 where
250 --------------------------------------
251 next :: [PhyloPeriodId]
252 next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
253 --------------------------------------
254 candidates :: [PhyloGroup]
255 candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
256 --------------------------------------
257 scores :: [(PhyloGroupId, Double)]
258 scores = map (\group' -> getProximity (prox,param) group group') candidates
259 --------------------------------------
260 best :: [(PhyloGroupId, Double)]
261 best = reverse
262 $ sortOn snd
263 $ filter (\(id,score) -> score >= thr) scores
264 --------------------------------------
265
266
267 -- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
268 makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
269 makePair to group ids = case to of
270 Childs -> over (phylo_groupPeriodChilds) addPointers group
271 Parents -> over (phylo_groupPeriodParents) addPointers group
272 _ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
273 where
274 --------------------------------------
275 addPointers :: [Pointer] -> [Pointer]
276 addPointers l = nub $ (l ++ ids)
277 --------------------------------------
278
279
280 -- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
281 pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
282 pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
283 (\groups ->
284 map (\group ->
285 if (getGroupLevel group) == lvl
286 then
287 let
288 --------------------------------------
289 candidates :: [(PhyloGroupId, Double)]
290 candidates = findBestCandidates to 1 5 thr (prox,param) group p
291 --------------------------------------
292 in
293 makePair to group candidates
294 else
295 group ) groups) p
296
297
298 phyloWithPair_1_Childs :: Phylo
299 phyloWithPair_1_Childs = pairGroupsToGroups Childs 1 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
300
301
302 phyloWithPair_1_Parents :: Phylo
303 phyloWithPair_1_Parents = pairGroupsToGroups Parents 1 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
304
305
306 ------------------------------------------------------------------------
307 -- | STEP 10 | -- Build the coocurency Matrix of the Phylo
308
309
310 -- | To transform the Fis into a coocurency Matrix in a Phylo
311 fisToCooc :: Map (Date, Date) [Fis] -> Phylo -> Map (Int, Int) Double
312 fisToCooc m p = map (/docs)
313 $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
314 $ concat
315 $ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x)
316 $ (concat . elems) m
317 where
318 --------------------------------------
319 fisNgrams :: [Ngrams]
320 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m
321 --------------------------------------
322 docs :: Double
323 docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m
324 --------------------------------------
325 cooc :: Map (Int, Int) (Double)
326 cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
327 --------------------------------------
328
329
330 phyloCooc :: Map (Int, Int) Double
331 phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
332
333
334 ------------------------------------------------------------------------
335 -- | STEP 9 | -- Build level 1 of the Phylo
336
337
338 -- | To Cliques into Groups
339 cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) [Fis] -> Phylo -> PhyloGroup
340 cliqueToGroup period lvl idx label fis m p =
341 PhyloGroup ((period, lvl), idx) label ngrams (singleton "support" (fromIntegral $ snd fis)) cooc [] [] [] []
342 where
343 --------------------------------------
344 ngrams :: [Int]
345 ngrams = sort $ map (\x -> ngramsToIdx x p)
346 $ Set.toList
347 $ fst fis
348 --------------------------------------
349 cooc :: Map (Int, Int) Double
350 cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
351 $ fisToCooc (restrictKeys m $ Set.fromList [period]) p
352 --------------------------------------
353
354
355 -- | To transform Fis into PhyloLevels
356 fisToPhyloLevel :: Map (Date, Date) [Fis] -> Phylo -> Phylo
357 fisToPhyloLevel m p = over (phylo_periods . traverse)
358 (\period ->
359 let periodId = _phylo_periodId period
360 fisList = zip [1..] (m ! periodId)
361 in over (phylo_periodLevels)
362 (\phyloLevels ->
363 let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
364 in phyloLevels ++ [PhyloLevel (periodId, 1) groups]
365 ) period ) p
366
367
368 phyloLinked_0_1 :: Phylo
369 phyloLinked_0_1 = alterLevelLinks (0,1) phyloLinked_1_0
370
371
372 phyloLinked_1_0 :: Phylo
373 phyloLinked_1_0 = alterLevelLinks (1,0) phyloWithGroups1
374
375
376 phyloWithGroups1 :: Phylo
377 phyloWithGroups1 = updatePhyloByLevel 1 phyloLinked_m1_0
378
379
380 ------------------------------------------------------------------------
381 -- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
382
383
384 -- | To Filter Fis by support
385 filterFisBySupport :: Bool -> Int -> Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
386 filterFisBySupport empty min m = case empty of
387 True -> Map.map (\l -> filterMinorFis min l) m
388 False -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
389
390
391 -- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
392 filterMinorFis :: Int -> [Fis] -> [Fis]
393 filterMinorFis min l = filter (\fis -> snd fis > min) l
394
395
396 -- | To filter nested Fis
397 filterFisByNested :: Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
398 filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
399 in filter (\fis -> elem (fst fis) cliqueMax) l)
400
401
402 -- | To transform a list of Documents into a Frequent Items Set
403 docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [Fis]
404 docsToFis docs = map (\d -> Map.toList
405 $ fisWithSizePolyMap (Segment 1 20) 1 (map (words . text) d)) docs
406
407
408 phyloFisFiltered :: Map (Date, Date) [Fis]
409 phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
410
411
412 phyloFis :: Map (Date, Date) [Fis]
413 phyloFis = docsToFis phyloPeriods
414
415
416 ------------------------------------------------------------------------
417 -- | STEP 7 | -- Link level -1 to level 0
418
419
420 phyloLinked_m1_0 :: Phylo
421 phyloLinked_m1_0 = alterLevelLinks ((-1),0) phyloLinked_0_m1
422
423
424 ------------------------------------------------------------------------
425 -- | STEP 6 | -- Link level 0 to level -1
426
427
428 -- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
429 linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
430 linkGroupToGroups (lvl,lvl') current targets
431 | lvl < lvl' = setLevelParents current
432 | lvl > lvl' = setLevelChilds current
433 | otherwise = current
434 where
435 --------------------------------------
436 setLevelChilds :: PhyloGroup -> PhyloGroup
437 setLevelChilds = over (phylo_groupLevelChilds) addPointers
438 --------------------------------------
439 setLevelParents :: PhyloGroup -> PhyloGroup
440 setLevelParents = over (phylo_groupLevelParents) addPointers
441 --------------------------------------
442 addPointers :: [Pointer] -> [Pointer]
443 addPointers lp = lp ++ Maybe.mapMaybe (\target ->
444 if shouldLink (lvl,lvl')
445 (_phylo_groupNgrams current)
446 (_phylo_groupNgrams target )
447 then Just ((getGroupId target),1)
448 else Nothing) targets
449 --------------------------------------
450
451
452 -- | To set the LevelLinks between two lists of PhyloGroups
453 linkGroupsByLevel :: (Level,Level) -> Phylo -> [PhyloGroup] -> [PhyloGroup]
454 linkGroupsByLevel (lvl,lvl') p groups = map (\group ->
455 if getGroupLevel group == lvl
456 then linkGroupToGroups (lvl,lvl') group (getGroupsWithFilters lvl' (getGroupPeriod group) p)
457 else group) groups
458
459
460 -- | To set the LevelLink of all the PhyloGroups of a Phylo
461 alterLevelLinks :: (Level,Level) -> Phylo -> Phylo
462 alterLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
463
464
465 phyloLinked_0_m1 :: Phylo
466 phyloLinked_0_m1 = alterLevelLinks (0,(-1)) phyloWithGroups0
467
468
469 ------------------------------------------------------------------------
470 -- | STEP 5 | -- Build level 0 as a copy of level -1
471
472
473 -- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
474 clonePhyloLevel :: Level -> Phylo -> Phylo
475 clonePhyloLevel lvl p = alterPhyloLevels (\l -> addPhyloLevel (setPhyloLevelId lvl $ head l) l) p
476
477
478 phyloWithGroups0 :: Phylo
479 phyloWithGroups0 = updatePhyloByLevel 0 phyloWithGroupsm1
480
481
482 ------------------------------------------------------------------------
483 -- | STEP 4 | -- Build level -1
484
485
486 -- | To transform a list of Documents into a PhyloLevel
487 docsToPhyloLevel :: Level -> (Date, Date) -> [Document] -> Phylo -> PhyloLevel
488 docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
489 ((d, d'), lvl)
490 (map (\(f,s) -> initGroup [s] s f lvl d d' p)
491 $ zip [1..]
492 $ (nub . concat)
493 $ map (words . text) docs)
494
495
496 -- | To transform a Map of Periods and Documents into a list of PhyloPeriods
497 docsToPhyloPeriods :: Level -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
498 docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
499 $ Map.toList
500 $ mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
501
502
503 -- | To update a Phylo for a given Levels
504 updatePhyloByLevel :: Level -> Phylo -> Phylo
505 updatePhyloByLevel lvl p
506 | lvl < 0 = appendPhyloPeriods (docsToPhyloPeriods lvl phyloPeriods p) p
507 | lvl == 0 = clonePhyloLevel lvl p
508 | lvl == 1 = fisToPhyloLevel phyloFisFiltered p
509 | lvl > 1 = undefined
510 | otherwise = panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
511
512
513 phyloWithGroupsm1 :: Phylo
514 phyloWithGroupsm1 = updatePhyloByLevel (-1) phylo
515
516
517 ------------------------------------------------------------------------
518 -- | STEP 3 | -- Parse the Documents and group them by Periods
519
520
521 -- | To init a set of periods out of a given Grain and Step
522 docsToPeriods :: (Ord date, Enum date) => (doc -> date)
523 -> Grain -> Step -> [doc] -> Map (date, date) [doc]
524 docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
525 docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
526 where
527 --------------------------------------
528 hs = steps g s $ both f (head es, last es)
529 --------------------------------------
530 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
531 inPeriode f' h (start,end) =
532 fst $ List.partition (\d -> f' d >= start && f' d <= end) h
533 --------------------------------------
534 steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
535 steps s' o' (start,end) = map (\l -> (head l, last l))
536 $ chunkAlong s' o' [start .. end]
537 --------------------------------------
538
539
540 -- | To parse a list of Documents by filtering on a Vector of Ngrams
541 parseDocs :: PhyloNgrams -> [Document] -> [Document]
542 parseDocs l docs = map (\(Document d t)
543 -> Document d ( unwords
544 $ filter (\x -> Vector.elem x l)
545 $ monoTexts t)) docs
546
547
548 -- | To group a list of Documents by fixed periods
549 groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
550 groupDocsByPeriod g s docs p = docsToPeriods date g s $ parseDocs (getPhyloNgrams p) docs
551
552
553 phyloPeriods :: Map (Date, Date) [Document]
554 phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
555
556
557 ------------------------------------------------------------------------
558 -- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
559
560
561 phylo :: Phylo
562 phylo = initPhylo phyloDocs (initNgrams actants)
563
564
565 ------------------------------------------------------------------------
566 -- | STEP 1 | -- Get a list of Document
567
568
569 -- | To transform a corpus of texts into a structured list of Documents
570 corpusToDocs :: [(Date, Text)] -> [Document]
571 corpusToDocs l = map (\(d,t) -> Document d t) l
572
573
574 phyloDocs :: [Document]
575 phyloDocs = corpusToDocs corpus
576
577
578 ------------------------------------------------------------------------
579 -- | STEP 0 | -- Let's start with an example
580
581
582 actants :: [Ngrams]
583 actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
584 , "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
585 , "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
586
587
588 corpus :: [(Date, Text)]
589 corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]