]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
[text-api] first rewrite using Conduit
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / PhyloTools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.PhyloTools
3 Description : Module dedicated to all the tools needed for making a Phylo
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# LANGUAGE ViewPatterns #-}
12
13 module Gargantext.Core.Viz.Phylo.PhyloTools where
14
15 import Control.Lens hiding (Level)
16 import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
17 import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
18 import Data.Set (Set, disjoint)
19 import Data.String (String)
20 import Data.Text (Text,unpack)
21 import Data.Vector (Vector, elemIndex)
22 import Debug.Trace (trace)
23 import Gargantext.Core.Viz.Phylo
24 import Gargantext.Prelude
25 import Prelude (floor,read)
26 import Text.Printf
27 import qualified Data.List as List
28 import qualified Data.Map as Map
29 import qualified Data.Set as Set
30 import qualified Data.Text as Text
31 import qualified Data.Vector as Vector
32
33 ------------
34 -- | Io | --
35 ------------
36
37 -- | To print an important message as an IO()
38 printIOMsg :: String -> IO ()
39 printIOMsg msg =
40 putStrLn ( "\n"
41 <> "------------"
42 <> "\n"
43 <> "-- | " <> msg <> "\n" )
44
45
46 -- | To print a comment as an IO()
47 printIOComment :: String -> IO ()
48 printIOComment cmt =
49 putStrLn ( "\n" <> cmt <> "\n" )
50
51
52 --------------
53 -- | Misc | --
54 --------------
55
56 -- truncate' :: Double -> Int -> Double
57 -- truncate' x n = (fromIntegral (floor (x * t))) / t
58 -- where t = 10^n
59
60 truncate' :: Double -> Int -> Double
61 truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
62 where
63 --------------
64 t :: Double
65 t = 10 ^n
66
67 getInMap :: Int -> Map Int Double -> Double
68 getInMap k m =
69 if (member k m)
70 then m ! k
71 else 0
72
73 roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
74 roundToStr = printf "%0.*f"
75
76
77 countSup :: Double -> [Double] -> Int
78 countSup s l = length $ filter (>s) l
79
80
81 dropByIdx :: Int -> [a] -> [a]
82 dropByIdx k l = take k l ++ drop (k+1) l
83
84
85 elemIndex' :: Eq a => a -> [a] -> Int
86 elemIndex' e l = case (List.elemIndex e l) of
87 Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
88 Just i -> i
89
90
91 commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
92 commonPrefix lst lst' acc =
93 if (null lst || null lst')
94 then acc
95 else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
96 then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
97 else acc
98
99
100 ---------------------
101 -- | Foundations | --
102 ---------------------
103
104
105 -- | Is this Ngrams a Foundations Root ?
106 isRoots :: Ngrams -> Vector Ngrams -> Bool
107 isRoots n ns = Vector.elem n ns
108
109 -- | To transform a list of nrams into a list of foundation's index
110 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
111 ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
112
113 -- | To transform a list of sources into a list of sources' index
114 sourcesToIdx :: [Text] -> Vector Text -> [Int]
115 sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
116
117 -- | To transform a list of Ngrams Indexes into a Label
118 ngramsToLabel :: Vector Ngrams -> [Int] -> Text
119 ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
120
121 idxToLabel :: [Int] -> String
122 idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
123
124 idxToLabel' :: [Double] -> String
125 idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
126
127 -- | To transform a list of Ngrams Indexes into a list of Text
128 ngramsToText :: Vector Ngrams -> [Int] -> [Text]
129 ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
130
131
132 --------------
133 -- | Time | --
134 --------------
135
136 -- | To transform a list of periods into a set of Dates
137 periodsToYears :: [(Date,Date)] -> Set Date
138 periodsToYears periods = (Set.fromList . sort . concat)
139 $ map (\(d,d') -> [d..d']) periods
140
141
142 findBounds :: [Date] -> (Date,Date)
143 findBounds dates =
144 let dates' = sort dates
145 in (head' "findBounds" dates', last' "findBounds" dates')
146
147
148 toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
149 toPeriods dates p s =
150 let (start,end) = findBounds dates
151 in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
152 $ chunkAlong p s [start .. end]
153
154
155 toFstDate :: [Text] -> Text
156 toFstDate ds = snd
157 $ head' "firstDate"
158 $ sortOn fst
159 $ map (\d ->
160 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
161 in (d',d)) ds
162
163 toLstDate :: [Text] -> Text
164 toLstDate ds = snd
165 $ head' "firstDate"
166 $ reverse
167 $ sortOn fst
168 $ map (\d ->
169 let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
170 in (d',d)) ds
171
172
173 getTimeScale :: Phylo -> [Char]
174 getTimeScale p = case (timeUnit $ getConfig p) of
175 Epoch _ _ _ -> "epoch"
176 Year _ _ _ -> "year"
177 Month _ _ _ -> "month"
178 Week _ _ _ -> "week"
179 Day _ _ _ -> "day"
180
181
182 -- | Get a regular & ascendante timeScale from a given list of dates
183 toTimeScale :: [Date] -> Int -> [Date]
184 toTimeScale dates step =
185 let (start,end) = findBounds dates
186 in [start, (start + step) .. end]
187
188
189 getTimeStep :: TimeUnit -> Int
190 getTimeStep time = case time of
191 Epoch _ s _ -> s
192 Year _ s _ -> s
193 Month _ s _ -> s
194 Week _ s _ -> s
195 Day _ s _ -> s
196
197 getTimePeriod :: TimeUnit -> Int
198 getTimePeriod time = case time of
199 Epoch p _ _ -> p
200 Year p _ _ -> p
201 Month p _ _ -> p
202 Week p _ _ -> p
203 Day p _ _ -> p
204
205 getTimeFrame :: TimeUnit -> Int
206 getTimeFrame time = case time of
207 Epoch _ _ f -> f
208 Year _ _ f -> f
209 Month _ _ f -> f
210 Week _ _ f -> f
211 Day _ _ f -> f
212
213 -------------
214 -- | Fis | --
215 -------------
216
217
218 -- | To find if l' is nested in l
219 isNested :: Eq a => [a] -> [a] -> Bool
220 isNested l l'
221 | null l' = True
222 | length l' > length l = False
223 | (union l l') == l = True
224 | otherwise = False
225
226
227 -- | To filter Fis with small Support but by keeping non empty Periods
228 keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
229 keepFilled f thr l = if (null $ f thr l) && (not $ null l)
230 then keepFilled f (thr - 1) l
231 else f thr l
232
233
234 traceClique :: Map (Date, Date) [PhyloClique] -> String
235 traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
236 where
237 --------------------------------------
238 cliques :: [Double]
239 cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
240 --------------------------------------
241
242
243 traceSupport :: Map (Date, Date) [PhyloClique] -> String
244 traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
245 where
246 --------------------------------------
247 supports :: [Double]
248 supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
249 --------------------------------------
250
251
252 traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
253 traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
254 <> "Support : " <> (traceSupport mFis) <> "\n"
255 <> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
256
257
258 ---------------
259 -- | Clique| --
260 ---------------
261
262
263 getCliqueSupport :: Clique -> Int
264 getCliqueSupport unit = case unit of
265 Fis s _ -> s
266 MaxClique _ _ _ -> 0
267
268 getCliqueSize :: Clique -> Int
269 getCliqueSize unit = case unit of
270 Fis _ s -> s
271 MaxClique s _ _ -> s
272
273
274 --------------
275 -- | Cooc | --
276 --------------
277
278 listToCombi' :: [a] -> [(a,a)]
279 listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
280
281 listToEqual' :: Eq a => [a] -> [(a,a)]
282 listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
283
284 listToKeys :: Eq a => [a] -> [(a,a)]
285 listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
286
287 listToMatrix :: [Int] -> Map (Int,Int) Double
288 listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
289
290 listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
291 listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
292
293 listToSeq :: Eq a => [a] -> [(a,a)]
294 listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
295
296 sumCooc :: Cooc -> Cooc -> Cooc
297 sumCooc cooc cooc' = unionWith (+) cooc cooc'
298
299 getTrace :: Cooc -> Double
300 getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
301
302 coocToDiago :: Cooc -> Cooc
303 coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
304
305 -- | To build the local cooc matrix of each phylogroup
306 ngramsToCooc :: [Int] -> [Cooc] -> Cooc
307 ngramsToCooc ngrams coocs =
308 let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
309 pairs = listToKeys ngrams
310 in filterWithKey (\k _ -> elem k pairs) cooc
311
312
313 --------------------
314 -- | PhyloGroup | --
315 --------------------
316
317 getGroupId :: PhyloGroup -> PhyloGroupId
318 getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
319
320 idToPrd :: PhyloGroupId -> PhyloPeriodId
321 idToPrd id = (fst . fst) id
322
323 groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
324 groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
325
326 getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
327 getPeriodPointers fil g =
328 case fil of
329 ToChilds -> g ^. phylo_groupPeriodChilds
330 ToParents -> g ^. phylo_groupPeriodParents
331 ToChildsMemory -> undefined
332 ToParentsMemory -> undefined
333
334 filterProximity :: Proximity -> Double -> Double -> Bool
335 filterProximity proximity thr local =
336 case proximity of
337 WeightedLogJaccard _ -> local >= thr
338 WeightedLogSim _ -> local >= thr
339 Hamming _ -> undefined
340
341 getProximityName :: Proximity -> String
342 getProximityName proximity =
343 case proximity of
344 WeightedLogJaccard _ -> "WLJaccard"
345 WeightedLogSim _ -> "WeightedLogSim"
346 Hamming _ -> "Hamming"
347
348 ---------------
349 -- | Phylo | --
350 ---------------
351
352 addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
353 addPointers fil pty pointers g =
354 case pty of
355 TemporalPointer -> case fil of
356 ToChilds -> g & phylo_groupPeriodChilds .~ pointers
357 ToParents -> g & phylo_groupPeriodParents .~ pointers
358 ToChildsMemory -> undefined
359 ToParentsMemory -> undefined
360 LevelPointer -> case fil of
361 ToChilds -> g & phylo_groupLevelChilds .~ pointers
362 ToParents -> g & phylo_groupLevelParents .~ pointers
363 ToChildsMemory -> undefined
364 ToParentsMemory -> undefined
365
366 toPointer' :: Double -> Pointer -> Pointer'
367 toPointer' thr pt = (fst pt,(thr,snd pt))
368
369
370 addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
371 addMemoryPointers fil pty thr pointers g =
372 case pty of
373 TemporalPointer -> case fil of
374 ToChilds -> undefined
375 ToParents -> undefined
376 ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
377 ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
378 LevelPointer -> undefined
379
380
381 getPeriodIds :: Phylo -> [(Date,Date)]
382 getPeriodIds phylo = sortOn fst
383 $ keys
384 $ phylo ^. phylo_periods
385
386 getLevelParentId :: PhyloGroup -> PhyloGroupId
387 getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
388
389 getLastLevel :: Phylo -> Level
390 getLastLevel phylo = last' "lastLevel" $ getLevels phylo
391
392 getLevels :: Phylo -> [Level]
393 getLevels phylo = nub
394 $ map snd
395 $ keys $ view ( phylo_periods
396 . traverse
397 . phylo_periodLevels ) phylo
398
399 getSeaElevation :: Phylo -> SeaElevation
400 getSeaElevation phylo = seaElevation (getConfig phylo)
401
402
403 getConfig :: Phylo -> Config
404 getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
405
406
407 setConfig :: Config -> Phylo -> Phylo
408 setConfig config phylo = phylo
409 & phylo_param .~ (PhyloParam
410 ((phylo ^. phylo_param) ^. phyloParam_version)
411 ((phylo ^. phylo_param) ^. phyloParam_software)
412 config)
413
414 -- & phylo_param & phyloParam_config & phyloParam_config .~ config
415
416
417 getRoots :: Phylo -> Vector Ngrams
418 getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
419
420 getSources :: Phylo -> Vector Text
421 getSources phylo = _sources (phylo ^. phylo_sources)
422
423 phyloToLastBranches :: Phylo -> [[PhyloGroup]]
424 phyloToLastBranches phylo = elems
425 $ fromListWith (++)
426 $ map (\g -> (g ^. phylo_groupBranchId, [g]))
427 $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
428
429 getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
430 getGroupsFromLevel lvl phylo =
431 elems $ view ( phylo_periods
432 . traverse
433 . phylo_periodLevels
434 . traverse
435 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
436 . phylo_levelGroups ) phylo
437
438
439 getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
440 getGroupsFromLevelPeriods lvl periods phylo =
441 elems $ view ( phylo_periods
442 . traverse
443 . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
444 . phylo_periodLevels
445 . traverse
446 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
447 . phylo_levelGroups ) phylo
448
449
450 getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
451 getGroupsFromPeriods lvl periods =
452 elems $ view ( traverse
453 . phylo_periodLevels
454 . traverse
455 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
456 . phylo_levelGroups ) periods
457
458
459 updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
460 updatePhyloGroups lvl m phylo =
461 over ( phylo_periods
462 . traverse
463 . phylo_periodLevels
464 . traverse
465 . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
466 . phylo_levelGroups
467 . traverse
468 ) (\g ->
469 let id = getGroupId g
470 in
471 if member id m
472 then m ! id
473 else g ) phylo
474
475 updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
476 updatePeriods periods' phylo =
477 over (phylo_periods . traverse)
478 (\prd ->
479 let prd' = periods' ! (prd ^. phylo_periodPeriod)
480 lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
481 in prd & phylo_periodPeriod' .~ prd'
482 & phylo_periodLevels .~ lvls
483 ) phylo
484
485
486 traceToPhylo :: Level -> Phylo -> Phylo
487 traceToPhylo lvl phylo =
488 trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
489 <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
490 <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
491
492 --------------------
493 -- | Clustering | --
494 --------------------
495
496 mergeBranchIds :: [[Int]] -> [Int]
497 mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
498 where
499 -- | 2) find the most Up Left ids in the hierarchy of similarity
500 -- mostUpLeft :: [[Int]] -> [[Int]]
501 -- mostUpLeft ids' =
502 -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
503 -- inf = (fst . minimum) groupIds
504 -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
505 -- | 1) find the most frequent ids
506 mostFreq' :: [[Int]] -> [[Int]]
507 mostFreq' ids' =
508 let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
509 sup = (fst . maximum) groupIds
510 in map snd $ filter (\gIds -> fst gIds == sup) groupIds
511
512
513 mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
514 mergeMeta bId groups =
515 let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
516 in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
517
518
519 groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
520 groupsToBranches groups =
521 {- run the related component algorithm -}
522 let egos = map (\g -> [getGroupId g]
523 ++ (map fst $ g ^. phylo_groupPeriodParents)
524 ++ (map fst $ g ^. phylo_groupPeriodChilds)
525 ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
526 graph = relatedComponents egos
527 {- update each group's branch id -}
528 in map (\ids ->
529 let groups' = elems $ restrictKeys groups (Set.fromList ids)
530 bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
531 in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
532
533 relatedComponents :: Ord a => [[a]] -> [[a]]
534 relatedComponents graph = foldl' (\acc groups ->
535 if (null acc)
536 then acc ++ [groups]
537 else
538 let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
539 in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
540
541 toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
542 toRelatedComponents nodes edges =
543 let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
544 clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
545 in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
546
547
548 traceSynchronyEnd :: Phylo -> Phylo
549 traceSynchronyEnd phylo =
550 trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
551 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
552 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
553 <> "\n" ) phylo
554
555 traceSynchronyStart :: Phylo -> Phylo
556 traceSynchronyStart phylo =
557 trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
558 <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
559 <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
560 <> "\n" ) phylo
561
562
563 -------------------
564 -- | Proximity | --
565 -------------------
566
567 getSensibility :: Proximity -> Double
568 getSensibility proxi = case proxi of
569 WeightedLogJaccard s -> s
570 WeightedLogSim s -> s
571 Hamming _ -> undefined
572
573 ----------------
574 -- | Branch | --
575 ----------------
576
577 intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
578 intersectInit acc lst lst' =
579 if (null lst) || (null lst')
580 then acc
581 else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
582 then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
583 else acc
584
585 branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
586 branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
587
588 ngramsInBranches :: [[PhyloGroup]] -> [Int]
589 ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
590
591
592 traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
593 traceMatchSuccess thr qua qua' nextBranches =
594 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
595 $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
596 <> ",(1.." <> show (length nextBranches) <> ")]"
597 <> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
598 <> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
599 <> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
600
601
602 traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
603 traceMatchFailure thr qua qua' branches =
604 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
605 <> ",(1.." <> show (length branches) <> ")]"
606 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
607 <> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
608 ) branches
609
610
611 traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
612 traceMatchNoSplit branches =
613 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
614 <> ",(1.." <> show (length branches) <> ")]"
615 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
616 <> " - unable to split in smaller branches" <> "\n"
617 ) branches
618
619
620 traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
621 traceMatchLimit branches =
622 trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
623 <> ",(1.." <> show (length branches) <> ")]"
624 <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
625 <> " - unable to increase the threshold above 1" <> "\n"
626 ) branches
627
628
629 traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
630 traceMatchEnd groups =
631 trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
632 <> " branches and " <> show (length groups) <> " groups" <> "\n") groups
633
634
635 traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
636 traceTemporalMatching groups =
637 trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
638
639
640 traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
641 traceGroupsProxi m =
642 trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m