]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
refactoring temporal matching
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE ViewPatterns #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
14
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
17 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
26 import Text.Printf
27 import qualified Data.List as List
28 import qualified Data.Set as Set
29 import qualified Data.Text as Text
30 import qualified Data.Vector as Vector
31
32 ------------
33 -- | Io | --
34 ------------
35
36 -- | To print an important message as an IO()
37 printIOMsg :: String -> IO ()
38 printIOMsg msg =
39 putStrLn ( "\n"
40 <> "------------"
41 <> "\n"
42 <> "-- | " <> msg <> "\n" )
43
44
45 -- | To print a comment as an IO()
46 printIOComment :: String -> IO ()
47 printIOComment cmt =
48 putStrLn ( "\n" <> cmt <> "\n" )
49
50
51 --------------
52 -- | Misc | --
53 --------------
54
55 -- truncate' :: Double -> Int -> Double
56 -- truncate' x n = (fromIntegral (floor (x * t))) / t
57 -- where t = 10^n
58
59 truncate' :: Double -> Int -> Double
60 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
61 where
62 --------------
63 t :: Double
64 t = 10 ^n
65
66 getInMap :: Int -> Map Int Double -> Double
67 getInMap k m =
68 if (member k m)
69 then m ! k
70 else 0
71
72 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
73 roundToStr = printf "%0.*f"
74
75
76 countSup :: Double -> [Double] -> Int
77 countSup s l = length $ filter (>s) l
78
79
80 dropByIdx :: Int -> [a] -> [a]
81 dropByIdx k l = take k l ++ drop (k+1) l
82
83
84 elemIndex' :: Eq a => a -> [a] -> Int
85 elemIndex' e l = case (List.elemIndex e l) of
86 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
87 Just i -> i
88
89
90 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
91 commonPrefix lst lst' acc =
92 if (null lst || null lst')
93 then acc
94 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
95 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
96 else acc
97
98
99 ---------------------
100 -- | Foundations | --
101 ---------------------
102
103
104 -- | Is this Ngrams a Foundations Root ?
105 isRoots :: Ngrams -> Vector Ngrams -> Bool
106 isRoots n ns = Vector.elem n ns
107
108 -- | To transform a list of nrams into a list of foundation's index
109 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
110 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
111
112 -- | To transform a list of sources into a list of sources' index
113 sourcesToIdx :: [Text] -> Vector Text -> [Int]
114 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
115
116 -- | To transform a list of Ngrams Indexes into a Label
117 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
118 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
119
120 idxToLabel :: [Int] -> String
121 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
122
123 idxToLabel' :: [Double] -> String
124 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
125
126 -- | To transform a list of Ngrams Indexes into a list of Text
127 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
128 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
129
130
131 --------------
132 -- | Time | --
133 --------------
134
135 -- | To transform a list of periods into a set of Dates
136 periodsToYears :: [(Date,Date)] -> Set Date
137 periodsToYears periods = (Set.fromList . sort . concat)
138 $ map (\(d,d') -> [d..d']) periods
139
140
141 findBounds :: [Date] -> (Date,Date)
142 findBounds dates =
143 let dates' = sort dates
144 in (head' "findBounds" dates', last' "findBounds" dates')
145
146
147 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
148 toPeriods dates p s =
149 let (start,end) = findBounds dates
150 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
151 $ chunkAlong p s [start .. end]
152
153
154 toFstDate :: [Text] -> Text
155 toFstDate ds = snd
156 $ head' "firstDate"
157 $ sortOn fst
158 $ map (\d ->
159 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
160 in (d',d)) ds
161
162 toLstDate :: [Text] -> Text
163 toLstDate ds = snd
164 $ head' "firstDate"
165 $ reverse
166 $ sortOn fst
167 $ map (\d ->
168 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
169 in (d',d)) ds
170
171
172 getTimeScale :: Phylo -> [Char]
173 getTimeScale p = case (timeUnit $ getConfig p) of
174 Epoch _ _ _ -> "epoch"
175 Year _ _ _ -> "year"
176 Month _ _ _ -> "month"
177 Week _ _ _ -> "week"
178 Day _ _ _ -> "day"
179
180
181 -- | Get a regular & ascendante timeScale from a given list of dates
182 toTimeScale :: [Date] -> Int -> [Date]
183 toTimeScale dates step =
184 let (start,end) = findBounds dates
185 in [start, (start + step) .. end]
186
187
188 getTimeStep :: TimeUnit -> Int
189 getTimeStep time = case time of
190 Epoch _ s _ -> s
191 Year _ s _ -> s
192 Month _ s _ -> s
193 Week _ s _ -> s
194 Day _ s _ -> s
195
196 getTimePeriod :: TimeUnit -> Int
197 getTimePeriod time = case time of
198 Epoch p _ _ -> p
199 Year p _ _ -> p
200 Month p _ _ -> p
201 Week p _ _ -> p
202 Day p _ _ -> p
203
204 getTimeFrame :: TimeUnit -> Int
205 getTimeFrame time = case time of
206 Epoch _ _ f -> f
207 Year _ _ f -> f
208 Month _ _ f -> f
209 Week _ _ f -> f
210 Day _ _ f -> f
211
212 -------------
213 -- | Fis | --
214 -------------
215
216
217 -- | To find if l' is nested in l
218 isNested :: Eq a => [a] -> [a] -> Bool
219 isNested l l'
220 | null l' = True
221 | length l' > length l = False
222 | (union l l') == l = True
223 | otherwise = False
224
225
226 -- | To filter Fis with small Support but by keeping non empty Periods
227 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
228 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
229 then keepFilled f (thr - 1) l
230 else f thr l
231
232
233 traceClique :: Map (Date, Date) [Clustering] -> String
234 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
235 where
236 --------------------------------------
237 cliques :: [Double]
238 cliques = sort $ map (fromIntegral . length . _clustering_roots) $ concat $ elems mFis
239 --------------------------------------
240
241
242 traceSupport :: Map (Date, Date) [Clustering] -> String
243 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
244 where
245 --------------------------------------
246 supports :: [Double]
247 supports = sort $ map (fromIntegral . _clustering_support) $ concat $ elems mFis
248 --------------------------------------
249
250
251 traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
252 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
253 <> "Support : " <> (traceSupport mFis) <> "\n"
254 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
255
256
257 ----------------
258 -- | Cluster| --
259 ----------------
260
261
262 getCliqueSupport :: Cluster -> Int
263 getCliqueSupport unit = case unit of
264 Fis s _ -> s
265 MaxClique _ _ _ -> 0
266
267 getCliqueSize :: Cluster -> Int
268 getCliqueSize unit = case unit of
269 Fis _ s -> s
270 MaxClique s _ _ -> s
271
272
273 --------------
274 -- | Cooc | --
275 --------------
276
277 listToCombi' :: [a] -> [(a,a)]
278 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
279
280 listToEqual' :: Eq a => [a] -> [(a,a)]
281 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
282
283 listToKeys :: Eq a => [a] -> [(a,a)]
284 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
285
286 listToMatrix :: [Int] -> Map (Int,Int) Double
287 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
288
289 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
290 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
291
292 listToSeq :: Eq a => [a] -> [(a,a)]
293 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
294
295 sumCooc :: Cooc -> Cooc -> Cooc
296 sumCooc cooc cooc' = unionWith (+) cooc cooc'
297
298 getTrace :: Cooc -> Double
299 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
300
301 coocToDiago :: Cooc -> Cooc
302 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
303
304 -- | To build the local cooc matrix of each phylogroup
305 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
306 ngramsToCooc ngrams coocs =
307 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
308 pairs = listToKeys ngrams
309 in filterWithKey (\k _ -> elem k pairs) cooc
310
311
312 --------------------
313 -- | PhyloGroup | --
314 --------------------
315
316 getGroupId :: PhyloGroup -> PhyloGroupId
317 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
318
319 idToPrd :: PhyloGroupId -> Period
320 idToPrd id = (fst . fst) id
321
322 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
323 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
324
325 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
326 getPeriodPointers fil g =
327 case fil of
328 ToChilds -> g ^. phylo_groupPeriodChilds
329 ToParents -> g ^. phylo_groupPeriodParents
330 ToChildsMemory -> undefined
331 ToParentsMemory -> undefined
332
333 filterProximity :: Proximity -> Double -> Double -> Bool
334 filterProximity proximity thr local =
335 case proximity of
336 WeightedLogJaccard _ _ -> local >= thr
337 WeightedLogSim _ _ -> local >= thr
338 Hamming _ _ -> undefined
339
340 getProximityName :: Proximity -> String
341 getProximityName proximity =
342 case proximity of
343 WeightedLogJaccard _ _ -> "WLJaccard"
344 WeightedLogSim _ _ -> "WeightedLogSim"
345 Hamming _ _ -> "Hamming"
346
347 ---------------
348 -- | Phylo | --
349 ---------------
350
351 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
352 addPointers fil pty pointers g =
353 case pty of
354 TemporalPointer -> case fil of
355 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
356 ToParents -> g & phylo_groupPeriodParents .~ pointers
357 ToChildsMemory -> undefined
358 ToParentsMemory -> undefined
359 ScalePointer -> case fil of
360 ToChilds -> g & phylo_groupScaleChilds .~ pointers
361 ToParents -> g & phylo_groupScaleParents .~ pointers
362 ToChildsMemory -> undefined
363 ToParentsMemory -> undefined
364
365 toPointer' :: Double -> Pointer -> Pointer'
366 toPointer' thr pt = (fst pt,(thr,snd pt))
367
368
369 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
370 addMemoryPointers fil pty thr pointers g =
371 case pty of
372 TemporalPointer -> case fil of
373 ToChilds -> undefined
374 ToParents -> undefined
375 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
376 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
377 ScalePointer -> undefined
378
379
380 getPeriodIds :: Phylo -> [(Date,Date)]
381 getPeriodIds phylo = sortOn fst
382 $ keys
383 $ phylo ^. phylo_periods
384
385 getLevelParentId :: PhyloGroup -> PhyloGroupId
386 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
387
388 getLastLevel :: Phylo -> Scale
389 getLastLevel phylo = last' "lastLevel" $ getScales phylo
390
391 getScales :: Phylo -> [Scale]
392 getScales phylo = nub
393 $ map snd
394 $ keys $ view ( phylo_periods
395 . traverse
396 . phylo_periodScales ) phylo
397
398 getSeaElevation :: Phylo -> SeaElevation
399 getSeaElevation phylo = seaElevation (getConfig phylo)
400
401
402 getPhyloSeaRiseStart :: Phylo -> Double
403 getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
404 Constante s _ -> s
405 Adaptative _ -> 0
406
407 getPhyloSeaRiseSteps :: Phylo -> Double
408 getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
409 Constante _ s -> s
410 Adaptative s -> s
411
412
413 getConfig :: Phylo -> PhyloConfig
414 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
415
416
417 setConfig :: PhyloConfig -> Phylo -> Phylo
418 setConfig config phylo = phylo
419 & phylo_param .~ (PhyloParam
420 ((phylo ^. phylo_param) ^. phyloParam_version)
421 ((phylo ^. phylo_param) ^. phyloParam_software)
422 config)
423
424 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
425
426
427 getRoots :: Phylo -> Vector Ngrams
428 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
429
430 getSources :: Phylo -> Vector Text
431 getSources phylo = _sources (phylo ^. phylo_sources)
432
433
434 -- get the groups distributed by branches at the last scale
435 phyloLastScale :: Phylo -> [[PhyloGroup]]
436 phyloLastScale phylo = elems
437 $ fromListWith (++)
438 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
439 $ getGroupsFromScale (last' "byBranches" $ getScales phylo) phylo
440
441 getGroupsFromScale :: Scale -> Phylo -> [PhyloGroup]
442 getGroupsFromScale lvl phylo =
443 elems $ view ( phylo_periods
444 . traverse
445 . phylo_periodScales
446 . traverse
447 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
448 . phylo_scaleGroups ) phylo
449
450
451 getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
452 getGroupsFromScalePeriods lvl periods phylo =
453 elems $ view ( phylo_periods
454 . traverse
455 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
456 . phylo_periodScales
457 . traverse
458 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
459 . phylo_scaleGroups ) phylo
460
461
462 getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
463 getGroupsFromPeriods lvl periods =
464 elems $ view ( traverse
465 . phylo_periodScales
466 . traverse
467 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
468 . phylo_scaleGroups ) periods
469
470
471 updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
472 updatePhyloGroups lvl m phylo =
473 over ( phylo_periods
474 . traverse
475 . phylo_periodScales
476 . traverse
477 . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
478 . phylo_scaleGroups
479 . traverse
480 ) (\g ->
481 let id = getGroupId g
482 in
483 if member id m
484 then m ! id
485 else g ) phylo
486
487 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
488 updatePeriods periods' phylo =
489 over (phylo_periods . traverse)
490 (\prd ->
491 let prd' = periods' ! (prd ^. phylo_periodPeriod)
492 lvls = map (\lvl -> lvl & phylo_scalePeriodStr .~ prd') $ prd ^. phylo_periodScales
493 in prd & phylo_periodPeriodStr .~ prd'
494 & phylo_periodScales .~ lvls
495 ) phylo
496
497 updateQuality :: Double -> Phylo -> Phylo
498 updateQuality quality phylo = phylo { _phylo_quality = quality }
499
500
501 traceToPhylo :: Scale -> Phylo -> Phylo
502 traceToPhylo lvl phylo =
503 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
504 <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
505 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
506
507 --------------------
508 -- | Clustering | --
509 --------------------
510
511 mergeBranchIds :: [[Int]] -> [Int]
512 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
513 where
514 -- | 2) find the most Up Left ids in the hierarchy of similarity
515 -- mostUpLeft :: [[Int]] -> [[Int]]
516 -- mostUpLeft ids' =
517 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
518 -- inf = (fst . minimum) groupIds
519 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
520 -- | 1) find the most frequent ids
521 mostFreq' :: [[Int]] -> [[Int]]
522 mostFreq' ids' =
523 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
524 sup = (fst . maximum) groupIds
525 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
526
527
528 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
529 mergeMeta bId groups =
530 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
531 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
532
533
534 groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
535 groupsToBranches' groups =
536 {- run the related component algorithm -}
537 let egos = map (\g -> [getGroupId g]
538 ++ (map fst $ g ^. phylo_groupPeriodParents)
539 ++ (map fst $ g ^. phylo_groupPeriodChilds)
540 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
541 graph = relatedComponents egos
542 {- update each group's branch id -}
543 in map (\ids ->
544 let groups' = elems $ restrictKeys groups (Set.fromList ids)
545 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
546 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
547
548
549 relatedComponents :: Ord a => [[a]] -> [[a]]
550 relatedComponents graph = foldl' (\branches groups ->
551 if (null branches)
552 then branches ++ [groups]
553 else
554 let branchPart = partition (\branch -> disjoint (Set.fromList branch) (Set.fromList groups)) branches
555 in (fst branchPart) ++ [nub $ concat $ (snd branchPart) ++ [groups]]) [] graph
556
557
558 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
559 toRelatedComponents nodes edges =
560 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
561 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
562 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
563
564
565 traceSynchronyEnd :: Phylo -> Phylo
566 traceSynchronyEnd phylo =
567 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
568 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
569 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
570 <> "\n" ) phylo
571
572 traceSynchronyStart :: Phylo -> Phylo
573 traceSynchronyStart phylo =
574 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
575 <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
576 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
577 <> "\n" ) phylo
578
579
580 -------------------
581 -- | Proximity | --
582 -------------------
583
584 getSensibility :: Proximity -> Double
585 getSensibility proxi = case proxi of
586 WeightedLogJaccard s _ -> s
587 WeightedLogSim s _ -> s
588 Hamming _ _ -> undefined
589
590 getMinSharedNgrams :: Proximity -> Int
591 getMinSharedNgrams proxi = case proxi of
592 WeightedLogJaccard _ m -> m
593 WeightedLogSim _ m -> m
594 Hamming _ _ -> undefined
595
596 ----------------
597 -- | Branch | --
598 ----------------
599
600 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
601 intersectInit acc lst lst' =
602 if (null lst) || (null lst')
603 then acc
604 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
605 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
606 else acc
607
608 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
609 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
610
611 ngramsInBranches :: [[PhyloGroup]] -> [Int]
612 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
613
614
615 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
616 traceMatchSuccess thr qua qua' nextBranches =
617 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
618 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
619 <> ",(1.." <> show (length nextBranches) <> ")]"
620 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
621 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
622 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
623
624
625 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
626 traceMatchFailure thr qua qua' branches =
627 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
628 <> ",(1.." <> show (length branches) <> ")]"
629 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
630 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
631 ) branches
632
633
634 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
635 traceMatchNoSplit branches =
636 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
637 <> ",(1.." <> show (length branches) <> ")]"
638 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
639 <> " - unable to split in smaller branches" <> "\n"
640 ) branches
641
642
643 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
644 traceMatchLimit branches =
645 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
646 <> ",(1.." <> show (length branches) <> ")]"
647 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
648 <> " - unable to increase the threshold above 1" <> "\n"
649 ) branches
650
651
652 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
653 traceMatchEnd groups =
654 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
655 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
656
657
658 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
659 traceTemporalMatching groups =
660 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
661
662
663 traceGroupsProxi :: [Double] -> [Double]
664 traceGroupsProxi l =
665 trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l