]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
maybe fix the phylo issue
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloMaker.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloMaker
3 Description : Maker engine for rebuilding a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11
12 module Gargantext.Core.Viz.Phylo.PhyloMaker where
13
14
15 import Control.DeepSeq (NFData)
16 import Control.Lens hiding (Level)
17 import Control.Parallel.Strategies (parList, rdeepseq, using)
18 import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
19 import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.Vector (Vector)
23 import Debug.Trace (trace)
24 import Prelude (floor)
25
26 import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
27 import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
28 import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
29 import Gargantext.Core.Viz.Phylo
30 import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
31 import Gargantext.Core.Viz.Phylo.PhyloTools
32 import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
33 import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
34 import Gargantext.Prelude
35
36 import qualified Data.Set as Set
37 import qualified Data.Map as Map
38 import qualified Data.List as List
39 import qualified Data.Vector as Vector
40
41 ------------------
42 -- | To Phylo | --
43 ------------------
44
45 {-
46 -- TODO AD
47 data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
48 | PhyloN { _phylo'_flatPhylo :: Phylo}
49
50
51 toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
52 toPhylo' (PhyloN phylo) = toPhylo'
53 toPhylo' (PhyloBase phylo) = toPhylo
54 -}
55
56 -- TODO an adaptative synchronic clustering with a slider
57
58 toPhylo :: Phylo -> Phylo
59 toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
60 if (phyloScale $ getConfig phylowithoutLink) > 1
61 then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
62 else phyloAncestors
63 where
64 --------------------------------------
65 phyloAncestors :: Phylo
66 phyloAncestors =
67 if (findAncestors $ getConfig phylowithoutLink)
68 then toHorizon phyloWithLinks
69 else phyloWithLinks
70 --------------------------------------
71 phyloWithLinks :: Phylo
72 phyloWithLinks = temporalMatching (getLadder phylowithoutLink) phylowithoutLink
73 --------------------------------------
74
75
76 -----------------------------
77 -- | Create a flat Phylo | --
78 -----------------------------
79
80
81 {-
82 -- create a square ladder
83 -}
84 squareLadder :: [Double] -> [Double]
85 squareLadder ladder = List.map (\x -> x * x) ladder
86
87
88 {-
89 -- create an adaptative 'sea elevation' ladder
90 -}
91 adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double]
92 adaptSeaLadder curr similarities ladder =
93 if curr <= 0 || Set.null similarities
94 then Set.toList ladder
95 else
96 let idx = ((Set.size similarities) `div` (floor curr)) - 1
97 thr = Set.elemAt idx similarities
98 -- we use a sliding methods 1/10, then 1/9, then ... 1/2
99 in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
100
101
102 {-
103 -- create a constante 'sea elevation' ladder
104 -}
105 constSeaLadder :: Double -> Double -> Set Double -> [Double]
106 constSeaLadder curr step ladder =
107 if curr > 1
108 then Set.toList ladder
109 else constSeaLadder (curr + step) step (Set.insert curr ladder)
110
111
112
113 {-
114 -- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
115 -}
116 evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double]
117 evolvSeaLadder nbFdt lambda freq similarities graph = map snd
118 $ filter fst
119 $ zip maxima (map fst qua')
120 -- 3) find the corresponding measures of similarity and create the ladder
121 where
122 --------
123 -- 2) find the local maxima in the quality distribution
124 maxima :: [Bool]
125 maxima = [snd (List.head qua') > snd (List.head $ List.tail qua')] ++ (findMaxima qua') ++ [snd (List.head $ reverse qua') > snd (List.head $ List.tail $ reverse qua')]
126 --------
127 -- 1.2)
128 qua' :: [(Double,Double)]
129 qua' = foldl (\acc (s,q) ->
130 if length acc == 0
131 then [(s,q)]
132 else if (snd (List.last acc)) == q
133 then acc
134 else acc ++ [(s,q)]
135 ) [] $ zip (Set.toList similarities) qua
136 --------
137 -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
138 qua :: [Double]
139 qua = map (\thr ->
140 let edges = filter (\edge -> snd edge >= thr) graph
141 nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges
142 branches = toRelatedComponents nodes edges
143 in toPhyloQuality nbFdt lambda freq branches
144 ) $ (Set.toList similarities)
145
146
147 {-
148 -- find a similarity ladder regarding the "sea elevation" strategy
149 -}
150 findSeaLadder :: Phylo -> Phylo
151 findSeaLadder phylo = case getSeaElevation phylo of
152 Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
153 Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
154 Evolving _ -> let ladder = evolvSeaLadder
155 (fromIntegral $ Vector.length $ getRoots phylo)
156 (getLevel phylo)
157 (getRootsFreq phylo)
158 similarities simGraph
159 in phylo & phylo_seaLadder .~ (if length ladder > 0
160 then ladder
161 -- if we don't find any local maxima with the evolving strategy
162 else constSeaLadder 0.1 0.1 Set.empty)
163 where
164 --------
165 -- 2) extract the values of the kinship links
166 similarities :: Set Double
167 similarities = Set.fromList $ sort $ map snd simGraph
168 --------
169 -- 1) we process an initial calculation of the kinship links
170 -- this initial calculation is used to estimate the real sea ladder
171 simGraph :: [((PhyloGroup,PhyloGroup),Double)]
172 simGraph = foldl' (\acc period ->
173 -- 1.1) process period by period
174 let sources = getGroupsFromScalePeriods 1 [period] phylo
175 next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods)
176 targets = getGroupsFromScalePeriods 1 next phylo
177 docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
178 diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
179 -- 1.2) compute the kinship similarities between pairs of source & target in parallel
180 pairs = map (\source ->
181 let candidates = filter (\target -> (> 2) $ length
182 $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
183 in map (\target ->
184 let nbDocs = (sum . elems)
185 $ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
186 diago = reduceDiagos
187 $ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
188 in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
189 ) candidates
190 ) sources
191 pairs' = pairs `using` parList rdeepseq
192 in acc ++ (concat pairs')
193 ) [] $ keys $ phylo ^. phylo_periods
194
195 appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
196 appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to scale " <> show (lvl) <> "\n")
197 $ over ( phylo_periods
198 . traverse
199 . phylo_periodScales
200 . traverse)
201 (\phyloLvl -> if lvl == (phyloLvl ^. phylo_scaleScale)
202 then
203 let pId = phyloLvl ^. phylo_scalePeriod
204 pId' = phyloLvl ^. phylo_scalePeriodStr
205 phyloCUnit = m ! pId
206 in phyloLvl
207 & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
208 groups ++ [ (((pId,lvl),length groups)
209 , f obj pId pId' lvl (length groups)
210 -- select the cooc of the periods
211 (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
212 -- select and merge the roots count of the periods
213 (foldl (\acc count -> unionWith (+) acc count) empty
214 $ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
215 ] ) [] phyloCUnit)
216 else
217 phyloLvl )
218 phylo
219
220
221 clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup
222 clusterToGroup fis pId pId' lvl idx coocs rootsCount = PhyloGroup pId pId' lvl idx ""
223 (fis ^. clustering_support )
224 (fis ^. clustering_visWeighting)
225 (fis ^. clustering_visFiltering)
226 (fis ^. clustering_roots)
227 (ngramsToCooc (fis ^. clustering_roots) coocs)
228 (ngramsToDensity (fis ^. clustering_roots) coocs rootsCount)
229 (1,[0]) -- branchid (lvl,[path in the branching tree])
230 (fromList [("breaks",[0]),("seaLevels",[0])])
231 rootsCount [] [] [] [] [] [] []
232
233
234 -----------------------
235 -- | To Phylo Step | --
236 -----------------------
237
238
239 indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
240 indexDates' m = map (\docs ->
241 let ds = map (\d -> date' d) docs
242 f = if (null ds)
243 then ""
244 else toFstDate ds
245 l = if (null ds)
246 then ""
247 else toLstDate ds
248 in (f,l)) m
249
250
251 -- create a map of roots and group ids
252 joinRoots :: Phylo -> Phylo
253 joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
254 where
255 --------------------------------------
256 rootsMap :: Map Int [PhyloGroupId]
257 rootsMap = fromListWith (++)
258 $ concat -- flatten
259 $ map (\g ->
260 map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
261 $ getGroupsFromScale 1 phylo
262
263
264 maybeDefaultParams :: Phylo -> Phylo
265 maybeDefaultParams phylo = if (defaultMode (getConfig phylo))
266 then findDefaultLevel phylo
267 else phylo
268
269
270 -- To build the first phylo step from docs and terms
271 -- QL: backend entre phyloBase et Clustering
272 -- tophylowithoutLink
273 toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
274 toPhyloWithoutLink docs conf = joinRoots
275 $ findSeaLadder
276 $ maybeDefaultParams
277 $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
278 where
279 --------------------------------------
280 seriesOfClustering :: Map (Date,Date) [Clustering]
281 seriesOfClustering = toSeriesOfClustering phyloBase docs'
282 --------------------------------------
283 docs' :: Map (Date,Date) [Document]
284 -- QL: Time Consuming here
285 docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
286 --------------------------------------
287 phyloBase :: Phylo
288 phyloBase = initPhylo docs conf
289 --------------------------------------
290
291 ---------------------------
292 -- | Frequent Item Set | --
293 ---------------------------
294
295
296 -- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
297 filterClique :: Bool -> Int -> (Int -> [Clustering] -> [Clustering]) -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
298 filterClique keep thr f m = case keep of
299 False -> map (\l -> f thr l) m
300 True -> map (\l -> keepFilled (f) thr l) m
301
302
303 -- To filter Fis with small Support
304 filterCliqueBySupport :: Int -> [Clustering] -> [Clustering]
305 filterCliqueBySupport thr l = filter (\clq -> (clq ^. clustering_support ) >= thr) l
306
307
308 -- To filter Fis with small Clique size
309 filterCliqueBySize :: Int -> [Clustering] -> [Clustering]
310 filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= thr) l
311
312
313 -- To filter nested Fis
314 filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
315 filterCliqueByNested m =
316 let clq = map (\l ->
317 foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
318 then mem
319 else
320 let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
321 in fMax ++ [f] ) [] l)
322 $ elems m
323 clq' = clq `using` parList rdeepseq
324 in fromList $ zip (keys m) clq'
325
326
327 -- | To transform a time map of docs into a time map of Fis with some filters
328 toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
329 toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
330 Fis s s' -> -- traceFis "Filtered Fis"
331 filterCliqueByNested
332 {- \$ traceFis "Filtered by clique size" -}
333 $ filterClique True s' (filterCliqueBySize)
334 {- \$ traceFis "Filtered by support" -}
335 $ filterClique True s (filterCliqueBySupport)
336 {- \$ traceFis "Unfiltered Fis" -}
337 seriesOfClustering
338 MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
339 seriesOfClustering
340 where
341 --------------------------------------
342 seriesOfClustering :: Map (Date,Date) [Clustering]
343 seriesOfClustering = case (clique $ getConfig phylo) of
344 Fis _ _ ->
345 let fis = map (\(prd,docs) ->
346 case (corpusParser $ getConfig phylo) of
347 Csv' _ -> let lst = toList
348 $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
349 in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
350 _ -> let lst = toList
351 $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
352 in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
353 )
354 $ toList phyloDocs
355 fis' = fis `using` parList rdeepseq
356 in fromList fis'
357 MaxClique _ thr filterType ->
358 let mcl = map (\(prd,docs) ->
359 let cooc = map round
360 $ foldl sumCooc empty
361 $ map listToMatrix
362 $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
363 in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
364 $ toList phyloDocs
365 mcl' = mcl `using` parList rdeepseq
366 in fromList mcl'
367 --------------------------------------
368
369 -- dev viz graph maxClique getMaxClique
370
371
372 --------------------
373 -- | Coocurency | --
374 --------------------
375
376
377 -- To transform the docs into a time map of coocurency matrix
378 docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
379 docsToTimeScaleCooc docs fdt =
380 let mCooc = fromListWith sumCooc
381 $ map (\(_d,l) -> (_d, listToMatrix l))
382 $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
383 mCooc' = fromList
384 $ map (\t -> (t,empty))
385 $ toTimeScale (map date docs) 1
386 in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
387 $ unionWith sumCooc mCooc mCooc'
388
389
390 -----------------------
391 -- | to Phylo Base | --
392 -----------------------
393
394 -- TODO anoe
395 groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
396 groupDocsByPeriodRec f prds docs acc =
397 if ((null prds) || (null docs))
398 then acc
399 else
400 let prd = head' "groupBy" prds
401 docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
402 in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
403
404
405 -- To group a list of Documents by fixed periods
406 groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
407 groupDocsByPeriod' f pds docs =
408 let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
409 periods = map (inPeriode f docs') pds
410 periods' = periods `using` parList rdeepseq
411 in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
412 $ fromList $ zip pds periods'
413 where
414 --------------------------------------
415 inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
416 inPeriode f' h (start,end) =
417 concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
418
419
420
421 -- To group a list of Documents by fixed periods
422 groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
423 groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
424 groupDocsByPeriod f pds es =
425 let periods = map (inPeriode f es) pds
426 periods' = periods `using` parList rdeepseq
427
428 in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
429 $ fromList $ zip pds periods'
430 where
431 --------------------------------------
432 inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
433 inPeriode f' h (start,end) =
434 fst $ partition (\d -> f' d >= start && f' d <= end) h
435 --------------------------------------
436
437
438 docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
439 docsToTermFreq docs fdt =
440 let nbDocs = fromIntegral $ length docs
441 freqs = map (/(nbDocs))
442 $ fromList
443 $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
444 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
445 sumFreqs = sum $ elems freqs
446 in map (/sumFreqs) freqs
447
448
449 docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
450 docsToTermCount docs roots = fromList
451 $ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
452 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs
453
454
455
456 docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
457 docsToTimeTermCount docs roots =
458 let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
459 $ group $ sort l)
460 $ fromListWith (++)
461 $ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs
462 time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
463 in unionWith (Map.union) time docs'
464
465
466 docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
467 docsToLastTermFreq n docs fdt =
468 let last = take n $ reverse $ sort $ map date docs
469 nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
470 freqs = map (/(nbDocs))
471 $ fromList
472 $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
473 $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
474 sumFreqs = sum $ elems freqs
475 in map (/sumFreqs) freqs
476
477
478 -- To count the number of docs by unit of time
479 docsToTimeScaleNb :: [Document] -> Map Date Double
480 docsToTimeScaleNb docs =
481 let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
482 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
483 in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
484 $ unionWith (+) time docs'
485
486
487 initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
488 initPhyloScales lvlMax pId =
489 fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
490
491
492
493 setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
494 setDefault conf timeScale nbDocs = defaultConfig
495 { corpusPath = (corpusPath conf)
496 , listPath = (listPath conf)
497 , outputPath = (outputPath conf)
498 , corpusParser = (corpusParser conf)
499 , listParser = (listParser conf)
500 , phyloName = (phyloName conf)
501 , defaultMode = True
502 , timeUnit = timeScale
503 , clique = Fis (toSupport nbDocs) 3}
504 where
505 --------------------------------------
506 toSupport :: Int -> Support
507 toSupport n
508 | n < 500 = 1
509 | n < 1000 = 2
510 | n < 2000 = 3
511 | n < 3000 = 4
512 | n < 5000 = 5
513 | otherwise = 6
514 --------------------------------------
515
516
517 -- Init the basic elements of a Phylo
518 --
519 initPhylo :: [Document] -> PhyloConfig -> Phylo
520 initPhylo docs conf =
521 let roots = Vector.fromList $ nub $ concat $ map text docs
522 timeScale = head' "initPhylo" $ map docTime docs
523 foundations = PhyloFoundations roots empty
524 docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
525 docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
526 (docsToTimeScaleNb docs)
527 (docsToTimeTermCount docs (foundations ^. foundations_roots))
528 (docsToTermCount docs (foundations ^. foundations_roots))
529 (docsToTermFreq docs (foundations ^. foundations_roots))
530 (docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
531 params = if (defaultMode conf)
532 then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
533 else defaultPhyloParam { _phyloParam_config = conf }
534 periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
535 in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
536 $ trace ("\n" <> "-- | lambda " <> show(_qua_granularity $ phyloQuality $ _phyloParam_config params))
537 $ Phylo foundations
538 docsSources
539 docsCounts
540 []
541 params
542 (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
543 0
544 (_qua_granularity $ phyloQuality $ _phyloParam_config params)