]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
[CLEAN] readFile homonyms
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE ViewPatterns #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
14
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
17 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
26 import Text.Printf
27 import qualified Data.List as List
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
30 import qualified Data.Text as Text
31 import qualified Data.Vector as Vector
32
33 ------------
34 -- | Io | --
35 ------------
36
37 -- | To print an important message as an IO()
38 printIOMsg :: String -> IO ()
39 printIOMsg msg =
40 putStrLn ( "\n"
41 <> "------------"
42 <> "\n"
43 <> "-- | " <> msg <> "\n" )
44
45
46 -- | To print a comment as an IO()
47 printIOComment :: String -> IO ()
48 printIOComment cmt =
49 putStrLn ( "\n" <> cmt <> "\n" )
50
51
52 --------------
53 -- | Misc | --
54 --------------
55
56 -- truncate' :: Double -> Int -> Double
57 -- truncate' x n = (fromIntegral (floor (x * t))) / t
58 -- where t = 10^n
59
60 truncate' :: Double -> Int -> Double
61 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
62 where
63 --------------
64 t :: Double
65 t = 10 ^n
66
67 getInMap :: Int -> Map Int Double -> Double
68 getInMap k m =
69 if (member k m)
70 then m ! k
71 else 0
72
73 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
74 roundToStr = printf "%0.*f"
75
76
77 countSup :: Double -> [Double] -> Int
78 countSup s l = length $ filter (>s) l
79
80
81 dropByIdx :: Int -> [a] -> [a]
82 dropByIdx k l = take k l ++ drop (k+1) l
83
84
85 elemIndex' :: Eq a => a -> [a] -> Int
86 elemIndex' e l = case (List.elemIndex e l) of
87 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
88 Just i -> i
89
90
91 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
92 commonPrefix lst lst' acc =
93 if (null lst || null lst')
94 then acc
95 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
96 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
97 else acc
98
99
100 ---------------------
101 -- | Foundations | --
102 ---------------------
103
104
105 -- | Is this Ngrams a Foundations Root ?
106 isRoots :: Ngrams -> Vector Ngrams -> Bool
107 isRoots n ns = Vector.elem n ns
108
109 -- | To transform a list of nrams into a list of foundation's index
110 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
111 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
112
113 -- | To transform a list of sources into a list of sources' index
114 sourcesToIdx :: [Text] -> Vector Text -> [Int]
115 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
116
117 -- | To transform a list of Ngrams Indexes into a Label
118 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
119 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
120
121 idxToLabel :: [Int] -> String
122 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
123
124 idxToLabel' :: [Double] -> String
125 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
126
127 -- | To transform a list of Ngrams Indexes into a list of Text
128 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
129 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
130
131
132 --------------
133 -- | Time | --
134 --------------
135
136 -- | To transform a list of periods into a set of Dates
137 periodsToYears :: [(Date,Date)] -> Set Date
138 periodsToYears periods = (Set.fromList . sort . concat)
139 $ map (\(d,d') -> [d..d']) periods
140
141
142 findBounds :: [Date] -> (Date,Date)
143 findBounds [] = panic "[G.C.V.P.PhyloTools] empty dates for find bounds"
144 findBounds dates =
145 let dates' = sort dates
146 in (head' "findBounds" dates', last' "findBounds" dates')
147
148
149 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
150 toPeriods dates p s =
151 let (start,end) = findBounds dates
152 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
153 $ chunkAlong p s [start .. end]
154
155
156 toFstDate :: [Text] -> Text
157 toFstDate ds = snd
158 $ head' "firstDate"
159 $ sortOn fst
160 $ map (\d ->
161 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
162 in (d',d)) ds
163
164 toLstDate :: [Text] -> Text
165 toLstDate ds = snd
166 $ head' "firstDate"
167 $ reverse
168 $ sortOn fst
169 $ map (\d ->
170 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
171 in (d',d)) ds
172
173
174 getTimeScale :: Phylo -> [Char]
175 getTimeScale p = case (timeUnit $ getConfig p) of
176 Epoch _ _ _ -> "epoch"
177 Year _ _ _ -> "year"
178 Month _ _ _ -> "month"
179 Week _ _ _ -> "week"
180 Day _ _ _ -> "day"
181
182
183 -- | Get a regular & ascendante timeScale from a given list of dates
184 toTimeScale :: [Date] -> Int -> [Date]
185 toTimeScale dates step =
186 let (start,end) = findBounds dates
187 in [start, (start + step) .. end]
188
189
190 getTimeStep :: TimeUnit -> Int
191 getTimeStep time = case time of
192 Epoch _ s _ -> s
193 Year _ s _ -> s
194 Month _ s _ -> s
195 Week _ s _ -> s
196 Day _ s _ -> s
197
198 getTimePeriod :: TimeUnit -> Int
199 getTimePeriod time = case time of
200 Epoch p _ _ -> p
201 Year p _ _ -> p
202 Month p _ _ -> p
203 Week p _ _ -> p
204 Day p _ _ -> p
205
206 getTimeFrame :: TimeUnit -> Int
207 getTimeFrame time = case time of
208 Epoch _ _ f -> f
209 Year _ _ f -> f
210 Month _ _ f -> f
211 Week _ _ f -> f
212 Day _ _ f -> f
213
214 -------------
215 -- | Fis | --
216 -------------
217
218
219 -- | To find if l' is nested in l
220 isNested :: Eq a => [a] -> [a] -> Bool
221 isNested l l'
222 | null l' = True
223 | length l' > length l = False
224 | (union l l') == l = True
225 | otherwise = False
226
227
228 -- | To filter Fis with small Support but by keeping non empty Periods
229 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
230 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
231 then keepFilled f (thr - 1) l
232 else f thr l
233
234
235 traceClique :: Map (Date, Date) [PhyloClique] -> String
236 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
237 where
238 --------------------------------------
239 cliques :: [Double]
240 cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
241 --------------------------------------
242
243
244 traceSupport :: Map (Date, Date) [PhyloClique] -> String
245 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
246 where
247 --------------------------------------
248 supports :: [Double]
249 supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
250 --------------------------------------
251
252
253 traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
254 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
255 <> "Support : " <> (traceSupport mFis) <> "\n"
256 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
257
258
259 ---------------
260 -- | Clique| --
261 ---------------
262
263
264 getCliqueSupport :: Clique -> Int
265 getCliqueSupport unit = case unit of
266 Fis s _ -> s
267 MaxClique _ _ _ -> 0
268
269 getCliqueSize :: Clique -> Int
270 getCliqueSize unit = case unit of
271 Fis _ s -> s
272 MaxClique s _ _ -> s
273
274
275 --------------
276 -- | Cooc | --
277 --------------
278
279 listToCombi' :: [a] -> [(a,a)]
280 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
281
282 listToEqual' :: Eq a => [a] -> [(a,a)]
283 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
284
285 listToKeys :: Eq a => [a] -> [(a,a)]
286 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
287
288 listToMatrix :: [Int] -> Map (Int,Int) Double
289 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
290
291 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
292 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
293
294 listToSeq :: Eq a => [a] -> [(a,a)]
295 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
296
297 sumCooc :: Cooc -> Cooc -> Cooc
298 sumCooc cooc cooc' = unionWith (+) cooc cooc'
299
300 getTrace :: Cooc -> Double
301 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
302
303 coocToDiago :: Cooc -> Cooc
304 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
305
306 -- | To build the local cooc matrix of each phylogroup
307 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
308 ngramsToCooc ngrams coocs =
309 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
310 pairs = listToKeys ngrams
311 in filterWithKey (\k _ -> elem k pairs) cooc
312
313
314 --------------------
315 -- | PhyloGroup | --
316 --------------------
317
318 getGroupId :: PhyloGroup -> PhyloGroupId
319 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
320
321 idToPrd :: PhyloGroupId -> PhyloPeriodId
322 idToPrd id = (fst . fst) id
323
324 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
325 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
326
327 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
328 getPeriodPointers fil g =
329 case fil of
330 ToChilds -> g ^. phylo_groupPeriodChilds
331 ToParents -> g ^. phylo_groupPeriodParents
332 ToChildsMemory -> undefined
333 ToParentsMemory -> undefined
334
335 filterProximity :: Proximity -> Double -> Double -> Bool
336 filterProximity proximity thr local =
337 case proximity of
338 WeightedLogJaccard _ -> local >= thr
339 WeightedLogSim _ -> local >= thr
340 Hamming _ -> undefined
341
342 getProximityName :: Proximity -> String
343 getProximityName proximity =
344 case proximity of
345 WeightedLogJaccard _ -> "WLJaccard"
346 WeightedLogSim _ -> "WeightedLogSim"
347 Hamming _ -> "Hamming"
348
349 ---------------
350 -- | Phylo | --
351 ---------------
352
353 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
354 addPointers fil pty pointers g =
355 case pty of
356 TemporalPointer -> case fil of
357 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
358 ToParents -> g & phylo_groupPeriodParents .~ pointers
359 ToChildsMemory -> undefined
360 ToParentsMemory -> undefined
361 LevelPointer -> case fil of
362 ToChilds -> g & phylo_groupLevelChilds .~ pointers
363 ToParents -> g & phylo_groupLevelParents .~ pointers
364 ToChildsMemory -> undefined
365 ToParentsMemory -> undefined
366
367 toPointer' :: Double -> Pointer -> Pointer'
368 toPointer' thr pt = (fst pt,(thr,snd pt))
369
370
371 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
372 addMemoryPointers fil pty thr pointers g =
373 case pty of
374 TemporalPointer -> case fil of
375 ToChilds -> undefined
376 ToParents -> undefined
377 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
378 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
379 LevelPointer -> undefined
380
381
382 getPeriodIds :: Phylo -> [(Date,Date)]
383 getPeriodIds phylo = sortOn fst
384 $ keys
385 $ phylo ^. phylo_periods
386
387 getLevelParentId :: PhyloGroup -> PhyloGroupId
388 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
389
390 getLastLevel :: Phylo -> Level
391 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
392
393 getLevels :: Phylo -> [Level]
394 getLevels phylo = nub
395 $ map snd
396 $ keys $ view ( phylo_periods
397 . traverse
398 . phylo_periodLevels ) phylo
399
400 getSeaElevation :: Phylo -> SeaElevation
401 getSeaElevation phylo = seaElevation (getConfig phylo)
402
403
404 getConfig :: Phylo -> PhyloConfig
405 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
406
407
408 setConfig :: PhyloConfig -> Phylo -> Phylo
409 setConfig config phylo = phylo
410 & phylo_param .~ (PhyloParam
411 ((phylo ^. phylo_param) ^. phyloParam_version)
412 ((phylo ^. phylo_param) ^. phyloParam_software)
413 config)
414
415 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
416
417
418 getRoots :: Phylo -> Vector Ngrams
419 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
420
421 getSources :: Phylo -> Vector Text
422 getSources phylo = _sources (phylo ^. phylo_sources)
423
424 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
425 phyloToLastBranches phylo = elems
426 $ fromListWith (++)
427 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
428 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
429
430 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
431 getGroupsFromLevel lvl phylo =
432 elems $ view ( phylo_periods
433 . traverse
434 . phylo_periodLevels
435 . traverse
436 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
437 . phylo_levelGroups ) phylo
438
439
440 getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
441 getGroupsFromLevelPeriods lvl periods phylo =
442 elems $ view ( phylo_periods
443 . traverse
444 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
445 . phylo_periodLevels
446 . traverse
447 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
448 . phylo_levelGroups ) phylo
449
450
451 getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
452 getGroupsFromPeriods lvl periods =
453 elems $ view ( traverse
454 . phylo_periodLevels
455 . traverse
456 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
457 . phylo_levelGroups ) periods
458
459
460 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
461 updatePhyloGroups lvl m phylo =
462 over ( phylo_periods
463 . traverse
464 . phylo_periodLevels
465 . traverse
466 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
467 . phylo_levelGroups
468 . traverse
469 ) (\g ->
470 let id = getGroupId g
471 in
472 if member id m
473 then m ! id
474 else g ) phylo
475
476 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
477 updatePeriods periods' phylo =
478 over (phylo_periods . traverse)
479 (\prd ->
480 let prd' = periods' ! (prd ^. phylo_periodPeriod)
481 lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
482 in prd & phylo_periodPeriod' .~ prd'
483 & phylo_periodLevels .~ lvls
484 ) phylo
485
486
487 traceToPhylo :: Level -> Phylo -> Phylo
488 traceToPhylo lvl phylo =
489 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
490 <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
491 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
492
493 --------------------
494 -- | Clustering | --
495 --------------------
496
497 mergeBranchIds :: [[Int]] -> [Int]
498 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
499 where
500 -- | 2) find the most Up Left ids in the hierarchy of similarity
501 -- mostUpLeft :: [[Int]] -> [[Int]]
502 -- mostUpLeft ids' =
503 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
504 -- inf = (fst . minimum) groupIds
505 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
506 -- | 1) find the most frequent ids
507 mostFreq' :: [[Int]] -> [[Int]]
508 mostFreq' ids' =
509 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
510 sup = (fst . maximum) groupIds
511 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
512
513
514 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
515 mergeMeta bId groups =
516 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
517 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
518
519
520 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
521 groupsToBranches groups =
522 {- run the related component algorithm -}
523 let egos = map (\g -> [getGroupId g]
524 ++ (map fst $ g ^. phylo_groupPeriodParents)
525 ++ (map fst $ g ^. phylo_groupPeriodChilds)
526 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
527 graph = relatedComponents egos
528 {- update each group's branch id -}
529 in map (\ids ->
530 let groups' = elems $ restrictKeys groups (Set.fromList ids)
531 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
532 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
533
534 relatedComponents :: Ord a => [[a]] -> [[a]]
535 relatedComponents graph = foldl' (\acc groups ->
536 if (null acc)
537 then acc ++ [groups]
538 else
539 let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
540 in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
541
542 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
543 toRelatedComponents nodes edges =
544 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
545 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
546 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
547
548
549 traceSynchronyEnd :: Phylo -> Phylo
550 traceSynchronyEnd phylo =
551 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
552 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
553 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
554 <> "\n" ) phylo
555
556 traceSynchronyStart :: Phylo -> Phylo
557 traceSynchronyStart phylo =
558 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
559 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
560 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
561 <> "\n" ) phylo
562
563
564 -------------------
565 -- | Proximity | --
566 -------------------
567
568 getSensibility :: Proximity -> Double
569 getSensibility proxi = case proxi of
570 WeightedLogJaccard s -> s
571 WeightedLogSim s -> s
572 Hamming _ -> undefined
573
574 ----------------
575 -- | Branch | --
576 ----------------
577
578 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
579 intersectInit acc lst lst' =
580 if (null lst) || (null lst')
581 then acc
582 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
583 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
584 else acc
585
586 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
587 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
588
589 ngramsInBranches :: [[PhyloGroup]] -> [Int]
590 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
591
592
593 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
594 traceMatchSuccess thr qua qua' nextBranches =
595 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
596 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
597 <> ",(1.." <> show (length nextBranches) <> ")]"
598 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
599 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
600 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
601
602
603 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
604 traceMatchFailure thr qua qua' branches =
605 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
606 <> ",(1.." <> show (length branches) <> ")]"
607 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
608 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
609 ) branches
610
611
612 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
613 traceMatchNoSplit branches =
614 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
615 <> ",(1.." <> show (length branches) <> ")]"
616 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
617 <> " - unable to split in smaller branches" <> "\n"
618 ) branches
619
620
621 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
622 traceMatchLimit branches =
623 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
624 <> ",(1.." <> show (length branches) <> ")]"
625 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
626 <> " - unable to increase the threshold above 1" <> "\n"
627 ) branches
628
629
630 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
631 traceMatchEnd groups =
632 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
633 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
634
635
636 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
637 traceTemporalMatching groups =
638 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
639
640
641 traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
642 traceGroupsProxi m =
643 trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m