]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
[DEBUG] Message
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE ViewPatterns #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
14
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
17 import Data.Map.Strict (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
26 import Text.Printf
27 import qualified Data.List as List
28 import qualified Data.Set as Set
29 import qualified Data.Text as Text
30 import qualified Data.Vector as Vector
31
32 ------------
33 -- | Io | --
34 ------------
35
36 -- | To print an important message as an IO()
37 printIOMsg :: String -> IO ()
38 printIOMsg msg =
39 putStrLn ( "\n"
40 <> "------------"
41 <> "\n"
42 <> "-- | " <> msg <> "\n" )
43
44
45 -- | To print a comment as an IO()
46 printIOComment :: String -> IO ()
47 printIOComment cmt =
48 putStrLn ( "\n" <> cmt <> "\n" )
49
50
51 --------------
52 -- | Misc | --
53 --------------
54
55 -- truncate' :: Double -> Int -> Double
56 -- truncate' x n = (fromIntegral (floor (x * t))) / t
57 -- where t = 10^n
58
59 truncate' :: Double -> Int -> Double
60 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
61 where
62 --------------
63 t :: Double
64 t = 10 ^n
65
66 getInMap :: Int -> Map Int Double -> Double
67 getInMap k m =
68 if (member k m)
69 then m ! k
70 else 0
71
72 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
73 roundToStr = printf "%0.*f"
74
75
76 countSup :: Double -> [Double] -> Int
77 countSup s l = length $ filter (>s) l
78
79
80 dropByIdx :: Int -> [a] -> [a]
81 dropByIdx k l = take k l ++ drop (k+1) l
82
83
84 elemIndex' :: Eq a => a -> [a] -> Int
85 elemIndex' e l = case (List.elemIndex e l) of
86 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
87 Just i -> i
88
89
90 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
91 commonPrefix lst lst' acc =
92 if (null lst || null lst')
93 then acc
94 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
95 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
96 else acc
97
98
99 ---------------------
100 -- | Foundations | --
101 ---------------------
102
103
104 -- | Is this Ngrams a Foundations Root ?
105 isRoots :: Ngrams -> Vector Ngrams -> Bool
106 isRoots n ns = Vector.elem n ns
107
108 -- | To transform a list of nrams into a list of foundation's index
109 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
110 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
111
112 -- | To transform a list of sources into a list of sources' index
113 sourcesToIdx :: [Text] -> Vector Text -> [Int]
114 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
115
116 -- | To transform a list of Ngrams Indexes into a Label
117 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
118 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
119
120 idxToLabel :: [Int] -> String
121 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
122
123 idxToLabel' :: [Double] -> String
124 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
125
126 -- | To transform a list of Ngrams Indexes into a list of Text
127 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
128 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
129
130
131 --------------
132 -- | Time | --
133 --------------
134
135 -- | To transform a list of periods into a set of Dates
136 periodsToYears :: [(Date,Date)] -> Set Date
137 periodsToYears periods = (Set.fromList . sort . concat)
138 $ map (\(d,d') -> [d..d']) periods
139
140
141 findBounds :: [Date] -> (Date,Date)
142 findBounds [] = panic "[G.C.V.P.PhyloTools] empty dates for find bounds"
143 findBounds dates =
144 let dates' = sort dates
145 in (head' "findBounds" dates', last' "findBounds" dates')
146
147
148 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
149 toPeriods dates p s =
150 let (start,end) = findBounds dates
151 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
152 $ chunkAlong p s [start .. end]
153
154
155 toFstDate :: [Text] -> Text
156 toFstDate ds = snd
157 $ head' "firstDate"
158 $ sortOn fst
159 $ map (\d ->
160 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
161 in (d',d)) ds
162
163 toLstDate :: [Text] -> Text
164 toLstDate ds = snd
165 $ head' "firstDate"
166 $ reverse
167 $ sortOn fst
168 $ map (\d ->
169 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
170 in (d',d)) ds
171
172
173 getTimeScale :: Phylo -> [Char]
174 getTimeScale p = case (timeUnit $ getConfig p) of
175 Epoch _ _ _ -> "epoch"
176 Year _ _ _ -> "year"
177 Month _ _ _ -> "month"
178 Week _ _ _ -> "week"
179 Day _ _ _ -> "day"
180
181
182 -- | Get a regular & ascendante timeScale from a given list of dates
183 toTimeScale :: [Date] -> Int -> [Date]
184 toTimeScale dates step =
185 let (start,end) = findBounds dates
186 in [start, (start + step) .. end]
187
188
189 getTimeStep :: TimeUnit -> Int
190 getTimeStep time = case time of
191 Epoch _ s _ -> s
192 Year _ s _ -> s
193 Month _ s _ -> s
194 Week _ s _ -> s
195 Day _ s _ -> s
196
197 getTimePeriod :: TimeUnit -> Int
198 getTimePeriod time = case time of
199 Epoch p _ _ -> p
200 Year p _ _ -> p
201 Month p _ _ -> p
202 Week p _ _ -> p
203 Day p _ _ -> p
204
205 getTimeFrame :: TimeUnit -> Int
206 getTimeFrame time = case time of
207 Epoch _ _ f -> f
208 Year _ _ f -> f
209 Month _ _ f -> f
210 Week _ _ f -> f
211 Day _ _ f -> f
212
213 -------------
214 -- | Fis | --
215 -------------
216
217
218 -- | To find if l' is nested in l
219 isNested :: Eq a => [a] -> [a] -> Bool
220 isNested l l'
221 | null l' = True
222 | length l' > length l = False
223 | (union l l') == l = True
224 | otherwise = False
225
226
227 -- | To filter Fis with small Support but by keeping non empty Periods
228 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
229 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
230 then keepFilled f (thr - 1) l
231 else f thr l
232
233
234 traceClique :: Map (Date, Date) [Clustering] -> String
235 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
236 where
237 --------------------------------------
238 cliques :: [Double]
239 cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
240 --------------------------------------
241
242
243 traceSupport :: Map (Date, Date) [Clustering] -> String
244 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
245 where
246 --------------------------------------
247 supports :: [Double]
248 supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
249 --------------------------------------
250
251
252 traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
253 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
254 <> "Support : " <> (traceSupport mFis) <> "\n"
255 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
256
257
258 ----------------
259 -- | Cluster| --
260 ----------------
261
262
263 getCliqueSupport :: Cluster -> Int
264 getCliqueSupport unit = case unit of
265 Fis s _ -> s
266 MaxClique _ _ _ -> 0
267
268 getCliqueSize :: Cluster -> Int
269 getCliqueSize unit = case unit of
270 Fis _ s -> s
271 MaxClique s _ _ -> s
272
273
274 --------------
275 -- | Cooc | --
276 --------------
277
278 listToCombi' :: [a] -> [(a,a)]
279 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
280
281 listToEqual' :: Eq a => [a] -> [(a,a)]
282 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
283
284 listToKeys :: Eq a => [a] -> [(a,a)]
285 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
286
287 listToMatrix :: [Int] -> Map (Int,Int) Double
288 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
289
290 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
291 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
292
293 listToSeq :: Eq a => [a] -> [(a,a)]
294 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
295
296 sumCooc :: Cooc -> Cooc -> Cooc
297 sumCooc cooc cooc' = unionWith (+) cooc cooc'
298
299 getTrace :: Cooc -> Double
300 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
301
302 coocToDiago :: Cooc -> Cooc
303 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
304
305 -- | To build the local cooc matrix of each phylogroup
306 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
307 ngramsToCooc ngrams coocs =
308 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
309 pairs = listToKeys ngrams
310 in filterWithKey (\k _ -> elem k pairs) cooc
311
312
313 --------------------
314 -- | PhyloGroup | --
315 --------------------
316
317 getGroupId :: PhyloGroup -> PhyloGroupId
318 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
319
320 idToPrd :: PhyloGroupId -> Period
321 idToPrd id = (fst . fst) id
322
323 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
324 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
325
326 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
327 getPeriodPointers fil g =
328 case fil of
329 ToChilds -> g ^. phylo_groupPeriodChilds
330 ToParents -> g ^. phylo_groupPeriodParents
331 ToChildsMemory -> undefined
332 ToParentsMemory -> undefined
333
334 filterProximity :: Proximity -> Double -> Double -> Bool
335 filterProximity proximity thr local =
336 case proximity of
337 WeightedLogJaccard _ _ -> local >= thr
338 WeightedLogSim _ _ -> local >= thr
339 Hamming _ _ -> undefined
340
341 getProximityName :: Proximity -> String
342 getProximityName proximity =
343 case proximity of
344 WeightedLogJaccard _ _ -> "WLJaccard"
345 WeightedLogSim _ _ -> "WeightedLogSim"
346 Hamming _ _ -> "Hamming"
347
348 ---------------
349 -- | Phylo | --
350 ---------------
351
352 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
353 addPointers fil pty pointers g =
354 case pty of
355 TemporalPointer -> case fil of
356 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
357 ToParents -> g & phylo_groupPeriodParents .~ pointers
358 ToChildsMemory -> undefined
359 ToParentsMemory -> undefined
360 ScalePointer -> case fil of
361 ToChilds -> g & phylo_groupScaleChilds .~ pointers
362 ToParents -> g & phylo_groupScaleParents .~ pointers
363 ToChildsMemory -> undefined
364 ToParentsMemory -> undefined
365
366 toPointer' :: Double -> Pointer -> Pointer'
367 toPointer' thr pt = (fst pt,(thr,snd pt))
368
369
370 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
371 addMemoryPointers fil pty thr pointers g =
372 case pty of
373 TemporalPointer -> case fil of
374 ToChilds -> undefined
375 ToParents -> undefined
376 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
377 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
378 ScalePointer -> undefined
379
380
381 getPeriodIds :: Phylo -> [(Date,Date)]
382 getPeriodIds phylo = sortOn fst
383 $ keys
384 $ phylo ^. phylo_periods
385
386 getLevelParentId :: PhyloGroup -> PhyloGroupId
387 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
388
389 getLastLevel :: Phylo -> Scale
390 getLastLevel phylo = last' "lastLevel" $ getScales phylo
391
392 getScales :: Phylo -> [Scale]
393 getScales phylo = nub
394 $ map snd
395 $ keys $ view ( phylo_periods
396 . traverse
397 . phylo_periodScales ) phylo
398
399 getSeaElevation :: Phylo -> SeaElevation
400 getSeaElevation phylo = seaElevation (getConfig phylo)
401
402
403 getPhyloSeaRiseStart :: Phylo -> Double
404 getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
405 Constante s _ -> s
406 Adaptative _ -> 0
407
408 getPhyloSeaRiseSteps :: Phylo -> Double
409 getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
410 Constante _ s -> s
411 Adaptative s -> s
412
413
414 getConfig :: Phylo -> PhyloConfig
415 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
416
417
418 setConfig :: PhyloConfig -> Phylo -> Phylo
419 setConfig config phylo = phylo
420 & phylo_param .~ (PhyloParam
421 ((phylo ^. phylo_param) ^. phyloParam_version)
422 ((phylo ^. phylo_param) ^. phyloParam_software)
423 config)
424
425 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
426
427
428 getRoots :: Phylo -> Vector Ngrams
429 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
430
431 getSources :: Phylo -> Vector Text
432 getSources phylo = _sources (phylo ^. phylo_sources)
433
434
435 -- get the groups distributed by branches at the last scale
436 phyloLastScale :: Phylo -> [[PhyloGroup]]
437 phyloLastScale phylo = elems
438 $ fromListWith (++)
439 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
440 $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
441
442 getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
443 getGroupsFromScale lvl phylo =
444 elems $ view ( phylo_periods
445 . traverse
446 . phylo_periodScales
447 . traverse
448 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
449 . phylo_scaleGroups ) phylo
450
451
452 getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
453 getGroupsFromScalePeriods lvl periods phylo =
454 elems $ view ( phylo_periods
455 . traverse
456 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
457 . phylo_periodScales
458 . traverse
459 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
460 . phylo_scaleGroups ) phylo
461
462
463 getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
464 getGroupsFromPeriods lvl periods =
465 elems $ view ( traverse
466 . phylo_periodScales
467 . traverse
468 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
469 . phylo_scaleGroups ) periods
470
471
472 updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
473 updatePhyloGroups lvl m phylo =
474 over ( phylo_periods
475 . traverse
476 . phylo_periodScales
477 . traverse
478 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
479 . phylo_scaleGroups
480 . traverse
481 ) (\g ->
482 let id = getGroupId g
483 in
484 if member id m
485 then m ! id
486 else g ) phylo
487
488 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
489 updatePeriods periods' phylo =
490 over (phylo_periods . traverse)
491 (\prd ->
492 let prd' = periods' ! (prd ^. phylo_periodPeriod)
493 lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
494 in prd & phylo_periodPeriodStr .~ prd'
495 & phylo_periodScales .~ lvls
496 ) phylo
497
498 updateQuality :: Double -> Phylo -> Phylo
499 updateQuality quality phylo = phylo { _phylo_quality = quality }
500
501
502 traceToPhylo :: Scale -> Phylo -> Phylo
503 traceToPhylo lvl phylo =
504 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
505 <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
506 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
507
508 --------------------
509 -- | Clustering | --
510 --------------------
511
512 mergeBranchIds :: [[Int]] -> [Int]
513 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
514 where
515 -- | 2) find the most Up Left ids in the hierarchy of similarity
516 -- mostUpLeft :: [[Int]] -> [[Int]]
517 -- mostUpLeft ids' =
518 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
519 -- inf = (fst . minimum) groupIds
520 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
521 -- | 1) find the most frequent ids
522 mostFreq' :: [[Int]] -> [[Int]]
523 mostFreq' ids' =
524 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
525 sup = (fst . maximum) groupIds
526 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
527
528
529 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
530 mergeMeta bId groups =
531 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
532 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
533
534
535 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
536 groupsToBranches' groups =
537 {- run the related component algorithm -}
538 let egos = map (\g -> [getGroupId g]
539 ++ (map fst $ g ^. phylo_groupPeriodParents)
540 ++ (map fst $ g ^. phylo_groupPeriodChilds)
541 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
542 graph = relatedComponents egos
543 {- update each group's branch id -}
544 in map (\ids ->
545 let groups' = elems $ restrictKeys groups (Set.fromList ids)
546 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
547 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
548
549
550 relatedComponents :: Ord a => [[a]] -> [[a]]
551 relatedComponents graph = foldl' (\branches groups ->
552 if (null branches)
553 then branches ++ [groups]
554 else
555 let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
556 in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
557
558
559 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
560 toRelatedComponents nodes edges =
561 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
562 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
563 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
564
565
566 traceSynchronyEnd :: Phylo -> Phylo
567 traceSynchronyEnd phylo =
568 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
569 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
570 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
571 <> "\n" ) phylo
572
573 traceSynchronyStart :: Phylo -> Phylo
574 traceSynchronyStart phylo =
575 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
576 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
577 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
578 <> "\n" ) phylo
579
580
581 -------------------
582 -- | Proximity | --
583 -------------------
584
585 getSensibility :: Proximity -> Double
586 getSensibility proxi = case proxi of
587 WeightedLogJaccard s _ -> s
588 WeightedLogSim s _ -> s
589 Hamming _ _ -> undefined
590
591 getMinSharedNgrams :: Proximity -> Int
592 getMinSharedNgrams proxi = case proxi of
593 WeightedLogJaccard _ m -> m
594 WeightedLogSim _ m -> m
595 Hamming _ _ -> undefined
596
597 ----------------
598 -- | Branch | --
599 ----------------
600
601 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
602 intersectInit acc lst lst' =
603 if (null lst) || (null lst')
604 then acc
605 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
606 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
607 else acc
608
609 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
610 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
611
612 ngramsInBranches :: [[PhyloGroup]] -> [Int]
613 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
614
615
616 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
617 traceMatchSuccess thr qua qua' nextBranches =
618 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
619 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
620 <> ",(1.." <> show (length nextBranches) <> ")]"
621 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
622 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
623 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
624
625
626 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
627 traceMatchFailure thr qua qua' branches =
628 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
629 <> ",(1.." <> show (length branches) <> ")]"
630 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
631 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
632 ) branches
633
634
635 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
636 traceMatchNoSplit branches =
637 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
638 <> ",(1.." <> show (length branches) <> ")]"
639 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
640 <> " - unable to split in smaller branches" <> "\n"
641 ) branches
642
643
644 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
645 traceMatchLimit branches =
646 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
647 <> ",(1.." <> show (length branches) <> ")]"
648 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
649 <> " - unable to increase the threshold above 1" <> "\n"
650 ) branches
651
652
653 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
654 traceMatchEnd groups =
655 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
656 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
657
658
659 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
660 traceTemporalMatching groups =
661 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
662
663
664 traceGroupsProxi :: [Double] -> [Double]
665 traceGroupsProxi l =
666 trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l