]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
[MERGE]
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE ViewPatterns #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
14
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
17 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
26 import Text.Printf
27 import qualified Data.List as List
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
30 import qualified Data.Text as Text
31 import qualified Data.Vector as Vector
32
33 ------------
34 -- | Io | --
35 ------------
36
37 -- | To print an important message as an IO()
38 printIOMsg :: String -> IO ()
39 printIOMsg msg =
40 putStrLn ( "\n"
41 <> "------------"
42 <> "\n"
43 <> "-- | " <> msg <> "\n" )
44
45
46 -- | To print a comment as an IO()
47 printIOComment :: String -> IO ()
48 printIOComment cmt =
49 putStrLn ( "\n" <> cmt <> "\n" )
50
51
52 --------------
53 -- | Misc | --
54 --------------
55
56 -- truncate' :: Double -> Int -> Double
57 -- truncate' x n = (fromIntegral (floor (x * t))) / t
58 -- where t = 10^n
59
60 truncate' :: Double -> Int -> Double
61 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
62 where
63 --------------
64 t :: Double
65 t = 10 ^n
66
67 getInMap :: Int -> Map Int Double -> Double
68 getInMap k m =
69 if (member k m)
70 then m ! k
71 else 0
72
73 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
74 roundToStr = printf "%0.*f"
75
76
77 countSup :: Double -> [Double] -> Int
78 countSup s l = length $ filter (>s) l
79
80
81 dropByIdx :: Int -> [a] -> [a]
82 dropByIdx k l = take k l ++ drop (k+1) l
83
84
85 elemIndex' :: Eq a => a -> [a] -> Int
86 elemIndex' e l = case (List.elemIndex e l) of
87 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
88 Just i -> i
89
90
91 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
92 commonPrefix lst lst' acc =
93 if (null lst || null lst')
94 then acc
95 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
96 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
97 else acc
98
99
100 ---------------------
101 -- | Foundations | --
102 ---------------------
103
104
105 -- | Is this Ngrams a Foundations Root ?
106 isRoots :: Ngrams -> Vector Ngrams -> Bool
107 isRoots n ns = Vector.elem n ns
108
109 -- | To transform a list of nrams into a list of foundation's index
110 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
111 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
112
113 -- | To transform a list of sources into a list of sources' index
114 sourcesToIdx :: [Text] -> Vector Text -> [Int]
115 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
116
117 -- | To transform a list of Ngrams Indexes into a Label
118 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
119 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
120
121 idxToLabel :: [Int] -> String
122 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
123
124 idxToLabel' :: [Double] -> String
125 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
126
127 -- | To transform a list of Ngrams Indexes into a list of Text
128 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
129 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
130
131
132 --------------
133 -- | Time | --
134 --------------
135
136 -- | To transform a list of periods into a set of Dates
137 periodsToYears :: [(Date,Date)] -> Set Date
138 periodsToYears periods = (Set.fromList . sort . concat)
139 $ map (\(d,d') -> [d..d']) periods
140
141
142 findBounds :: [Date] -> (Date,Date)
143 findBounds dates =
144 let dates' = sort dates
145 in (head' "findBounds" dates', last' "findBounds" dates')
146
147
148 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
149 toPeriods dates p s =
150 let (start,end) = findBounds dates
151 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
152 $ chunkAlong p s [start .. end]
153
154
155 toFstDate :: [Text] -> Text
156 toFstDate ds = snd
157 $ head' "firstDate"
158 $ sortOn fst
159 $ map (\d ->
160 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
161 in (d',d)) ds
162
163 toLstDate :: [Text] -> Text
164 toLstDate ds = snd
165 $ head' "firstDate"
166 $ reverse
167 $ sortOn fst
168 $ map (\d ->
169 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
170 in (d',d)) ds
171
172
173 getTimeScale :: Phylo -> [Char]
174 getTimeScale p = case (timeUnit $ getConfig p) of
175 Epoch _ _ _ -> "epoch"
176 Year _ _ _ -> "year"
177 Month _ _ _ -> "month"
178 Week _ _ _ -> "week"
179 Day _ _ _ -> "day"
180
181
182 -- | Get a regular & ascendante timeScale from a given list of dates
183 toTimeScale :: [Date] -> Int -> [Date]
184 toTimeScale dates step =
185 let (start,end) = findBounds dates
186 in [start, (start + step) .. end]
187
188
189 getTimeStep :: TimeUnit -> Int
190 getTimeStep time = case time of
191 Epoch _ s _ -> s
192 Year _ s _ -> s
193 Month _ s _ -> s
194 Week _ s _ -> s
195 Day _ s _ -> s
196
197 getTimePeriod :: TimeUnit -> Int
198 getTimePeriod time = case time of
199 Epoch p _ _ -> p
200 Year p _ _ -> p
201 Month p _ _ -> p
202 Week p _ _ -> p
203 Day p _ _ -> p
204
205 getTimeFrame :: TimeUnit -> Int
206 getTimeFrame time = case time of
207 Epoch _ _ f -> f
208 Year _ _ f -> f
209 Month _ _ f -> f
210 Week _ _ f -> f
211 Day _ _ f -> f
212
213 -------------
214 -- | Fis | --
215 -------------
216
217
218 -- | To find if l' is nested in l
219 isNested :: Eq a => [a] -> [a] -> Bool
220 isNested l l'
221 | null l' = True
222 | length l' > length l = False
223 | (union l l') == l = True
224 | otherwise = False
225
226
227 -- | To filter Fis with small Support but by keeping non empty Periods
228 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
229 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
230 then keepFilled f (thr - 1) l
231 else f thr l
232
233
234 traceClique :: Map (Date, Date) [Clustering] -> String
235 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
236 where
237 --------------------------------------
238 cliques :: [Double]
239 cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
240 --------------------------------------
241
242
243 traceSupport :: Map (Date, Date) [Clustering] -> String
244 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
245 where
246 --------------------------------------
247 supports :: [Double]
248 supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
249 --------------------------------------
250
251
252 traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
253 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
254 <> "Support : " <> (traceSupport mFis) <> "\n"
255 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
256
257
258 ----------------
259 -- | Cluster| --
260 ----------------
261
262
263 getCliqueSupport :: Cluster -> Int
264 getCliqueSupport unit = case unit of
265 Fis s _ -> s
266 MaxClique _ _ _ -> 0
267
268 getCliqueSize :: Cluster -> Int
269 getCliqueSize unit = case unit of
270 Fis _ s -> s
271 MaxClique s _ _ -> s
272
273
274 --------------
275 -- | Cooc | --
276 --------------
277
278 listToCombi' :: [a] -> [(a,a)]
279 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
280
281 listToEqual' :: Eq a => [a] -> [(a,a)]
282 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
283
284 listToKeys :: Eq a => [a] -> [(a,a)]
285 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
286
287 listToMatrix :: [Int] -> Map (Int,Int) Double
288 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
289
290 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
291 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
292
293 listToSeq :: Eq a => [a] -> [(a,a)]
294 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
295
296 sumCooc :: Cooc -> Cooc -> Cooc
297 sumCooc cooc cooc' = unionWith (+) cooc cooc'
298
299 getTrace :: Cooc -> Double
300 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
301
302 coocToDiago :: Cooc -> Cooc
303 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
304
305 -- | To build the local cooc matrix of each phylogroup
306 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
307 ngramsToCooc ngrams coocs =
308 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
309 pairs = listToKeys ngrams
310 in filterWithKey (\k _ -> elem k pairs) cooc
311
312
313 --------------------
314 -- | PhyloGroup | --
315 --------------------
316
317 getGroupId :: PhyloGroup -> PhyloGroupId
318 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
319
320 idToPrd :: PhyloGroupId -> Period
321 idToPrd id = (fst . fst) id
322
323 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
324 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
325
326 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
327 getPeriodPointers fil g =
328 case fil of
329 ToChilds -> g ^. phylo_groupPeriodChilds
330 ToParents -> g ^. phylo_groupPeriodParents
331 ToChildsMemory -> undefined
332 ToParentsMemory -> undefined
333
334 filterProximity :: Proximity -> Double -> Double -> Bool
335 filterProximity proximity thr local =
336 case proximity of
337 WeightedLogJaccard _ _ -> local >= thr
338 WeightedLogSim _ _ -> local >= thr
339 Hamming _ _ -> undefined
340
341 getProximityName :: Proximity -> String
342 getProximityName proximity =
343 case proximity of
344 WeightedLogJaccard _ _ -> "WLJaccard"
345 WeightedLogSim _ _ -> "WeightedLogSim"
346 Hamming _ _ -> "Hamming"
347
348 ---------------
349 -- | Phylo | --
350 ---------------
351
352 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
353 addPointers fil pty pointers g =
354 case pty of
355 TemporalPointer -> case fil of
356 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
357 ToParents -> g & phylo_groupPeriodParents .~ pointers
358 ToChildsMemory -> undefined
359 ToParentsMemory -> undefined
360 ScalePointer -> case fil of
361 ToChilds -> g & phylo_groupScaleChilds .~ pointers
362 ToParents -> g & phylo_groupScaleParents .~ pointers
363 ToChildsMemory -> undefined
364 ToParentsMemory -> undefined
365
366 toPointer' :: Double -> Pointer -> Pointer'
367 toPointer' thr pt = (fst pt,(thr,snd pt))
368
369
370 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
371 addMemoryPointers fil pty thr pointers g =
372 case pty of
373 TemporalPointer -> case fil of
374 ToChilds -> undefined
375 ToParents -> undefined
376 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
377 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
378 ScalePointer -> undefined
379
380
381 getPeriodIds :: Phylo -> [(Date,Date)]
382 getPeriodIds phylo = sortOn fst
383 $ keys
384 $ phylo ^. phylo_periods
385
386 getLevelParentId :: PhyloGroup -> PhyloGroupId
387 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
388
389 getLastLevel :: Phylo -> Scale
390 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
391
392 getLevels :: Phylo -> [Scale]
393 getLevels phylo = nub
394 $ map snd
395 $ keys $ view ( phylo_periods
396 . traverse
397 . phylo_periodScales ) phylo
398
399 getSeaElevation :: Phylo -> SeaElevation
400 getSeaElevation phylo = seaElevation (getConfig phylo)
401
402
403 getPhyloSeaRiseStart :: Phylo -> Double
404 getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
405 Constante s _ -> s
406 Adaptative _ -> 0
407
408 getPhyloSeaRiseSteps :: Phylo -> Double
409 getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
410 Constante _ s -> s
411 Adaptative s -> s
412
413
414 getConfig :: Phylo -> PhyloConfig
415 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
416
417
418 setConfig :: PhyloConfig -> Phylo -> Phylo
419 setConfig config phylo = phylo
420 & phylo_param .~ (PhyloParam
421 ((phylo ^. phylo_param) ^. phyloParam_version)
422 ((phylo ^. phylo_param) ^. phyloParam_software)
423 config)
424
425 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
426
427
428 getRoots :: Phylo -> Vector Ngrams
429 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
430
431 getSources :: Phylo -> Vector Text
432 getSources phylo = _sources (phylo ^. phylo_sources)
433
434 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
435 phyloToLastBranches phylo = elems
436 $ fromListWith (++)
437 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
438 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
439
440 getGroupsFromLevel :: Scale -> Phylo -> [PhyloGroup]
441 getGroupsFromLevel lvl phylo =
442 elems $ view ( phylo_periods
443 . traverse
444 . phylo_periodScales
445 . traverse
446 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
447 . phylo_scaleGroups ) phylo
448
449
450 getGroupsFromLevelPeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
451 getGroupsFromLevelPeriods lvl periods phylo =
452 elems $ view ( phylo_periods
453 . traverse
454 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
455 . phylo_periodScales
456 . traverse
457 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
458 . phylo_scaleGroups ) phylo
459
460
461 getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
462 getGroupsFromPeriods lvl periods =
463 elems $ view ( traverse
464 . phylo_periodScales
465 . traverse
466 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
467 . phylo_scaleGroups ) periods
468
469
470 updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
471 updatePhyloGroups lvl m phylo =
472 over ( phylo_periods
473 . traverse
474 . phylo_periodScales
475 . traverse
476 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
477 . phylo_scaleGroups
478 . traverse
479 ) (\g ->
480 let id = getGroupId g
481 in
482 if member id m
483 then m ! id
484 else g ) phylo
485
486 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
487 updatePeriods periods' phylo =
488 over (phylo_periods . traverse)
489 (\prd ->
490 let prd' = periods' ! (prd ^. phylo_periodPeriod)
491 lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
492 in prd & phylo_periodPeriodStr .~ prd'
493 & phylo_periodScales .~ lvls
494 ) phylo
495
496 updateQuality :: Double -> Phylo -> Phylo
497 updateQuality quality phylo = phylo { _phylo_quality = quality }
498
499
500 traceToPhylo :: Scale -> Phylo -> Phylo
501 traceToPhylo lvl phylo =
502 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
503 <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
504 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
505
506 --------------------
507 -- | Clustering | --
508 --------------------
509
510 mergeBranchIds :: [[Int]] -> [Int]
511 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
512 where
513 -- | 2) find the most Up Left ids in the hierarchy of similarity
514 -- mostUpLeft :: [[Int]] -> [[Int]]
515 -- mostUpLeft ids' =
516 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
517 -- inf = (fst . minimum) groupIds
518 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
519 -- | 1) find the most frequent ids
520 mostFreq' :: [[Int]] -> [[Int]]
521 mostFreq' ids' =
522 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
523 sup = (fst . maximum) groupIds
524 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
525
526
527 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
528 mergeMeta bId groups =
529 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
530 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
531
532
533 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
534 groupsToBranches' groups =
535 {- run the related component algorithm -}
536 let egos = map (\g -> [getGroupId g]
537 ++ (map fst $ g ^. phylo_groupPeriodParents)
538 ++ (map fst $ g ^. phylo_groupPeriodChilds)
539 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
540 graph = relatedComponents egos
541 {- update each group's branch id -}
542 in map (\ids ->
543 let groups' = elems $ restrictKeys groups (Set.fromList ids)
544 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
545 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
546
547
548 relatedComponents :: Ord a => [[a]] -> [[a]]
549 relatedComponents graph = foldl' (\branches groups ->
550 if (null branches)
551 then branches ++ [groups]
552 else
553 let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
554 in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
555
556
557 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
558 toRelatedComponents nodes edges =
559 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
560 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
561 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
562
563
564 traceSynchronyEnd :: Phylo -> Phylo
565 traceSynchronyEnd phylo =
566 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
567 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
568 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
569 <> "\n" ) phylo
570
571 traceSynchronyStart :: Phylo -> Phylo
572 traceSynchronyStart phylo =
573 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
574 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
575 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
576 <> "\n" ) phylo
577
578
579 -------------------
580 -- | Proximity | --
581 -------------------
582
583 getSensibility :: Proximity -> Double
584 getSensibility proxi = case proxi of
585 WeightedLogJaccard s _ -> s
586 WeightedLogSim s _ -> s
587 Hamming _ _ -> undefined
588
589 getMinSharedNgrams :: Proximity -> Int
590 getMinSharedNgrams proxi = case proxi of
591 WeightedLogJaccard _ m -> m
592 WeightedLogSim _ m -> m
593 Hamming _ _ -> undefined
594
595 ----------------
596 -- | Branch | --
597 ----------------
598
599 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
600 intersectInit acc lst lst' =
601 if (null lst) || (null lst')
602 then acc
603 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
604 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
605 else acc
606
607 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
608 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
609
610 ngramsInBranches :: [[PhyloGroup]] -> [Int]
611 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
612
613
614 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
615 traceMatchSuccess thr qua qua' nextBranches =
616 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
617 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
618 <> ",(1.." <> show (length nextBranches) <> ")]"
619 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
620 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
621 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
622
623
624 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
625 traceMatchFailure thr qua qua' branches =
626 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
627 <> ",(1.." <> show (length branches) <> ")]"
628 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
629 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
630 ) branches
631
632
633 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
634 traceMatchNoSplit branches =
635 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
636 <> ",(1.." <> show (length branches) <> ")]"
637 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
638 <> " - unable to split in smaller branches" <> "\n"
639 ) branches
640
641
642 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
643 traceMatchLimit branches =
644 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
645 <> ",(1.." <> show (length branches) <> ")]"
646 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
647 <> " - unable to increase the threshold above 1" <> "\n"
648 ) branches
649
650
651 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
652 traceMatchEnd groups =
653 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
654 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
655
656
657 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
658 traceTemporalMatching groups =
659 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
660
661
662 traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
663 traceGroupsProxi m =
664 trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m