]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
[FIX] Phylo
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloMaker
3 Description : Maker engine for rebuilding a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Core.Viz.Phylo.PhyloMaker where
13
14
15 import Control.DeepSeq (NFData)
16 import Control.Lens hiding (Level)
17 import Control.Parallel.Strategies (parList, rdeepseq, using)
18 import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
19 import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.Vector (Vector)
23 import Debug.Trace (trace)
24 import Prelude (floor)
25
26 import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
27 import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
28 import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
29 import Gargantext.Core.Viz.Phylo
30 import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
31 import Gargantext.Core.Viz.Phylo.PhyloTools
32 import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
33 import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
34 import Gargantext.Prelude
35
36 import qualified Data.Set as Set
37 import qualified Data.Map as Map
38 import qualified Data.List as List
39 import qualified Data.Vector as Vector
40
41 ------------------
42 -- | To Phylo | --
43 ------------------
44
45 {-
46 -- TODO AD
47 data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
48 | PhyloN { _phylo'_flatPhylo :: Phylo}
49
50
51 toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
52 toPhylo' (PhyloN phylo) = toPhylo'
53 toPhylo' (PhyloBase phylo) = toPhylo
54 -}
55
56 -- TODO an adaptative synchronic clustering with a slider
57
58 toPhylo :: Phylo -> Phylo
59 toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
60 if (phyloScale $ getConfig phylowithoutLink) > 1
61 then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
62 else phyloAncestors
63 where
64 --------------------------------------
65 phyloAncestors :: Phylo
66 phyloAncestors =
67 if (findAncestors $ getConfig phylowithoutLink)
68 then toHorizon phyloWithLinks
69 else phyloWithLinks
70 --------------------------------------
71 phyloWithLinks :: Phylo
72 phyloWithLinks = temporalMatching (getLadder phylowithoutLink) phylowithoutLink
73 --------------------------------------
74
75
76 -----------------------------
77 -- | Create a flat Phylo | --
78 -----------------------------
79
80
81 {-
82 -- create a square ladder
83 -}
84 squareLadder :: [Double] -> [Double]
85 squareLadder ladder = List.map (\x -> x * x) ladder
86
87
88 {-
89 -- create an adaptative 'sea elevation' ladder
90 -}
91 adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double]
92 adaptSeaLadder curr similarities ladder =
93 if curr <= 0 || Set.null similarities
94 then Set.toList ladder
95 else
96 let idx = ((Set.size similarities) `div` (floor curr)) - 1
97 thr = Set.elemAt idx similarities
98 -- we use a sliding methods 1/10, then 1/9, then ... 1/2
99 in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
100
101
102 {-
103 -- create a constante 'sea elevation' ladder
104 -}
105 constSeaLadder :: Double -> Double -> Set Double -> [Double]
106 constSeaLadder curr step ladder =
107 if curr > 1
108 then Set.toList ladder
109 else constSeaLadder (curr + step) step (Set.insert curr ladder)
110
111
112
113 {-
114 -- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
115 -}
116 evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double]
117 evolvSeaLadder nbFdt lambda freq similarities graph = map snd
118 $ filter fst
119 $ zip maxima (map fst qua')
120 -- 3) find the corresponding measures of similarity and create the ladder
121 where
122 --------
123 -- 2) find the local maxima in the quality distribution
124 maxima :: [Bool]
125 maxima = [snd (List.head qua') > snd (List.head $ List.tail qua')] ++ (findMaxima qua') ++ [snd (List.head $ reverse qua') > snd (List.head $ List.tail $ reverse qua')]
126 --------
127 -- 1.2)
128 qua' :: [(Double,Double)]
129 qua' = foldl (\acc (s,q) ->
130 if length acc == 0
131 then [(s,q)]
132 else if (snd (List.last acc)) == q
133 then acc
134 else acc ++ [(s,q)]
135 ) [] $ zip (Set.toList similarities) qua
136 --------
137 -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
138 qua :: [Double]
139 qua = map (\thr ->
140 let edges = filter (\edge -> snd edge >= thr) graph
141 nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges
142 branches = toRelatedComponents nodes edges
143 in toPhyloQuality nbFdt lambda freq branches
144 ) $ (Set.toList similarities)
145
146
147 {-
148 -- find a similarity ladder regarding the "sea elevation" strategy
149 -}
150 findSeaLadder :: Phylo -> Phylo
151 findSeaLadder phylo = case getSeaElevation phylo of
152 Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
153 Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
154 Evolving _ -> let ladder = evolvSeaLadder
155 (fromIntegral $ Vector.length $ getRoots phylo)
156 (getLevel phylo)
157 (getRootsFreq phylo)
158 similarities simGraph
159 in phylo & phylo_seaLadder .~ (if length ladder > 0
160 then ladder
161 -- if we don't find any local maxima with the evolving strategy
162 else constSeaLadder 0.1 0.1 Set.empty)
163 where
164 --------
165 -- 2) extract the values of the kinship links
166 similarities :: Set Double
167 similarities = Set.fromList $ sort $ map snd simGraph
168 --------
169 -- 1) we process an initial calculation of the kinship links
170 -- this initial calculation is used to estimate the real sea ladder
171 simGraph :: [((PhyloGroup,PhyloGroup),Double)]
172 simGraph = foldl' (\acc period ->
173 -- 1.1) process period by period
174 let sources = getGroupsFromScalePeriods 1 [period] phylo
175 next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods)
176 targets = getGroupsFromScalePeriods 1 next phylo
177 docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
178 diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
179 -- 1.2) compute the kinship similarities between pairs of source & target in parallel
180 pairs = map (\source ->
181 let candidates = filter (\target -> (> 2) $ length
182 $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
183 in map (\target ->
184 let nbDocs = (sum . elems)
185 $ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
186 diago = reduceDiagos
187 $ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
188 in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
189 ) candidates
190 ) sources
191 pairs' = pairs `using` parList rdeepseq
192 in acc ++ (concat pairs')
193 ) [] $ keys $ phylo ^. phylo_periods
194
195 appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
196 appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to scale " <> show (lvl) <> "\n")
197 $ over ( phylo_periods
198 . traverse
199 . phylo_periodScales
200 . traverse)
201 (\phyloLvl -> if lvl == (phyloLvl ^. phylo_scaleScale)
202 then
203 let pId = phyloLvl ^. phylo_scalePeriod
204 pId' = phyloLvl ^. phylo_scalePeriodStr
205 phyloCUnit = m ! pId
206 in phyloLvl
207 & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
208 groups ++ [ (((pId,lvl),length groups)
209 , f obj pId pId' lvl (length groups)
210 -- select the cooc of the periods
211 (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
212 -- select and merge the roots count of the periods
213 (foldl (\acc count -> unionWith (+) acc count) empty
214 $ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
215 ] ) [] phyloCUnit)
216 else
217 phyloLvl )
218 phylo
219
220
221 clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup
222 clusterToGroup fis pId pId' lvl idx coocs rootsCount = PhyloGroup pId pId' lvl idx ""
223 (fis ^. clustering_support )
224 (fis ^. clustering_visWeighting)
225 (fis ^. clustering_visFiltering)
226 (fis ^. clustering_roots)
227 (ngramsToCooc (fis ^. clustering_roots) coocs)
228 (ngramsToDensity (fis ^. clustering_roots) coocs rootsCount)
229 (1,[0]) -- branchid (lvl,[path in the branching tree])
230 (fromList [("breaks",[0]),("seaLevels",[0])])
231 rootsCount [] [] [] [] [] [] []
232
233
234 -----------------------
235 -- | To Phylo Step | --
236 -----------------------
237
238
239 indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
240 indexDates' m = map (\docs ->
241 let ds = map (\d -> date' d) docs
242 f = if (null ds)
243 then ""
244 else toFstDate ds
245 l = if (null ds)
246 then ""
247 else toLstDate ds
248 in (f,l)) m
249
250
251 -- create a map of roots and group ids
252 joinRoots :: Phylo -> Phylo
253 joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
254 where
255 --------------------------------------
256 rootsMap :: Map Int [PhyloGroupId]
257 rootsMap = fromListWith (++)
258 $ concat -- flatten
259 $ map (\g ->
260 map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
261 $ getGroupsFromScale 1 phylo
262
263
264 maybeDefaultParams :: Phylo -> Phylo
265 maybeDefaultParams phylo = if (defaultMode (getConfig phylo))
266 then findDefaultLevel phylo
267 else phylo
268
269
270 -- To build the first phylo step from docs and terms
271 -- QL: backend entre phyloBase et Clustering
272 toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
273 toPhyloWithoutLink docs conf = joinRoots
274 $ findSeaLadder
275 $ maybeDefaultParams
276 $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
277 where
278 --------------------------------------
279 seriesOfClustering :: Map (Date,Date) [Clustering]
280 seriesOfClustering = toSeriesOfClustering phyloBase docs'
281 --------------------------------------
282 docs' :: Map (Date,Date) [Document]
283 -- QL: Time Consuming here
284 docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
285 --------------------------------------
286 phyloBase :: Phylo
287 phyloBase = initPhylo docs conf
288 --------------------------------------
289
290 ---------------------------
291 -- | Frequent Item Set | --
292 ---------------------------
293
294
295 -- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
296 filterClique :: Bool -> Int -> (Int -> [Clustering] -> [Clustering]) -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
297 filterClique keep thr f m = case keep of
298 False -> map (\l -> f thr l) m
299 True -> map (\l -> keepFilled (f) thr l) m
300
301
302 -- To filter Fis with small Support
303 filterCliqueBySupport :: Int -> [Clustering] -> [Clustering]
304 filterCliqueBySupport thr l = filter (\clq -> (clq ^. clustering_support ) >= thr) l
305
306
307 -- To filter Fis with small Clique size
308 filterCliqueBySize :: Int -> [Clustering] -> [Clustering]
309 filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= thr) l
310
311
312 -- To filter nested Fis
313 filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
314 filterCliqueByNested m =
315 let clq = map (\l ->
316 foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
317 then mem
318 else
319 let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
320 in fMax ++ [f] ) [] l)
321 $ elems m
322 clq' = clq `using` parList rdeepseq
323 in fromList $ zip (keys m) clq'
324
325
326 -- | To transform a time map of docs into a time map of Fis with some filters
327 toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
328 toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
329 Fis s s' -> -- traceFis "Filtered Fis"
330 filterCliqueByNested
331 {- \$ traceFis "Filtered by clique size" -}
332 $ filterClique True s' (filterCliqueBySize)
333 {- \$ traceFis "Filtered by support" -}
334 $ filterClique True s (filterCliqueBySupport)
335 {- \$ traceFis "Unfiltered Fis" -}
336 seriesOfClustering
337 MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
338 seriesOfClustering
339 where
340 --------------------------------------
341 seriesOfClustering :: Map (Date,Date) [Clustering]
342 seriesOfClustering = case (clique $ getConfig phylo) of
343 Fis _ _ ->
344 let fis = map (\(prd,docs) ->
345 case (corpusParser $ getConfig phylo) of
346 Csv' _ -> let lst = toList
347 $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
348 in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
349 _ -> let lst = toList
350 $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
351 in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
352 )
353 $ toList phyloDocs
354 fis' = fis `using` parList rdeepseq
355 in fromList fis'
356 MaxClique _ thr filterType ->
357 let mcl = map (\(prd,docs) ->
358 let cooc = map round
359 $ foldl sumCooc empty
360 $ map listToMatrix
361 $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
362 in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
363 $ toList phyloDocs
364 mcl' = mcl `using` parList rdeepseq
365 in fromList mcl'
366 --------------------------------------
367
368 -- dev viz graph maxClique getMaxClique
369
370
371 --------------------
372 -- | Coocurency | --
373 --------------------
374
375
376 -- To transform the docs into a time map of coocurency matrix
377 docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
378 docsToTimeScaleCooc docs fdt =
379 let mCooc = fromListWith sumCooc
380 $ map (\(_d,l) -> (_d, listToMatrix l))
381 $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
382 mCooc' = fromList
383 $ map (\t -> (t,empty))
384 $ toTimeScale (map date docs) 1
385 in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
386 $ unionWith sumCooc mCooc mCooc'
387
388
389 -----------------------
390 -- | to Phylo Base | --
391 -----------------------
392
393 -- TODO anoe
394 groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
395 groupDocsByPeriodRec f prds docs acc =
396 if ((null prds) || (null docs))
397 then acc
398 else
399 let prd = head' "groupBy" prds
400 docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
401 in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
402
403
404 -- To group a list of Documents by fixed periods
405 groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
406 groupDocsByPeriod' f pds docs =
407 let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
408 periods = map (inPeriode f docs') pds
409 periods' = periods `using` parList rdeepseq
410 in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
411 $ fromList $ zip pds periods'
412 where
413 --------------------------------------
414 inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
415 inPeriode f' h (start,end) =
416 concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
417
418
419
420 -- To group a list of Documents by fixed periods
421 groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
422 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
423 groupDocsByPeriod f pds es =
424 let periods = map (inPeriode f es) pds
425 periods' = periods `using` parList rdeepseq
426
427 in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
428 $ fromList $ zip pds periods'
429 where
430 --------------------------------------
431 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
432 inPeriode f' h (start,end) =
433 fst $ partition (\d -> f' d >= start && f' d <= end) h
434 --------------------------------------
435
436
437 docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
438 docsToTermFreq docs fdt =
439 let nbDocs = fromIntegral $ length docs
440 freqs = map (/(nbDocs))
441 $ fromList
442 $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
443 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
444 sumFreqs = sum $ elems freqs
445 in map (/sumFreqs) freqs
446
447
448 docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
449 docsToTermCount docs roots = fromList
450 $ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
451 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs
452
453
454
455 docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
456 docsToTimeTermCount docs roots =
457 let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
458 $ group $ sort l)
459 $ fromListWith (++)
460 $ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs
461 time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
462 in unionWith (Map.union) time docs'
463
464
465 docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
466 docsToLastTermFreq n docs fdt =
467 let last = take n $ reverse $ sort $ map date docs
468 nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
469 freqs = map (/(nbDocs))
470 $ fromList
471 $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
472 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
473 sumFreqs = sum $ elems freqs
474 in map (/sumFreqs) freqs
475
476
477 -- To count the number of docs by unit of time
478 docsToTimeScaleNb :: [Document] -> Map Date Double
479 docsToTimeScaleNb docs =
480 let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
481 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
482 in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
483 $ unionWith (+) time docs'
484
485
486 initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
487 initPhyloScales lvlMax pId =
488 fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
489
490
491
492 setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
493 setDefault conf timeScale nbDocs = defaultConfig
494 { corpusPath = (corpusPath conf)
495 , listPath = (listPath conf)
496 , outputPath = (outputPath conf)
497 , corpusParser = (corpusParser conf)
498 , listParser = (listParser conf)
499 , phyloName = (phyloName conf)
500 , defaultMode = True
501 , timeUnit = timeScale
502 , clique = Fis (toSupport nbDocs) 3}
503 where
504 --------------------------------------
505 toSupport :: Int -> Support
506 toSupport n
507 | n < 500 = 1
508 | n < 1000 = 2
509 | n < 2000 = 3
510 | n < 3000 = 4
511 | n < 5000 = 5
512 | otherwise = 6
513 --------------------------------------
514
515
516 -- Init the basic elements of a Phylo
517 --
518 initPhylo :: [Document] -> PhyloConfig -> Phylo
519 initPhylo docs conf =
520 let roots = Vector.fromList $ nub $ concat $ map text docs
521 timeScale = head' "initPhylo" $ map docTime docs
522 foundations = PhyloFoundations roots empty
523 docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
524 docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
525 (docsToTimeScaleNb docs)
526 (docsToTimeTermCount docs (foundations ^. foundations_roots))
527 (docsToTermCount docs (foundations ^. foundations_roots))
528 (docsToTermFreq docs (foundations ^. foundations_roots))
529 (docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
530 params = if (defaultMode conf)
531 then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
532 else defaultPhyloParam { _phyloParam_config = conf }
533 periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
534 in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
535 $ trace ("\n" <> "-- | lambda " <> show(_qua_granularity $ phyloQuality $ _phyloParam_config params))
536 $ Phylo foundations
537 docsSources
538 docsCounts
539 []
540 params
541 (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
542 0
543 (_qua_granularity $ phyloQuality $ _phyloParam_config params)