module Gargantext.Core.Viz.Phylo.PhyloTools where
-import Data.Vector (Vector, elemIndex)
-import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy)
+import Control.Lens hiding (Level)
+import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
+import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
-import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String)
-import Data.Text (Text, unwords)
-
+import Data.Text (Text,unpack)
+import Data.Vector (Vector, elemIndex)
+import Debug.Trace (trace)
+import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude
-import Gargantext.Core.Viz.AdaptativePhylo
+import Prelude (floor,read)
import Text.Printf
-
-
-import Debug.Trace (trace)
-import Control.Lens hiding (Level)
-
-import qualified Data.Vector as Vector
import qualified Data.List as List
-import qualified Data.Set as Set
import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.Text as Text
+import qualified Data.Vector as Vector
------------
-- | Io | --
-- | To print an important message as an IO()
printIOMsg :: String -> IO ()
-printIOMsg msg =
+printIOMsg msg =
putStrLn ( "\n"
- <> "------------"
+ <> "------------"
<> "\n"
<> "-- | " <> msg <> "\n" )
-- | Misc | --
--------------
+-- truncate' :: Double -> Int -> Double
+-- truncate' x n = (fromIntegral (floor (x * t))) / t
+-- where t = 10^n
+
+truncate' :: Double -> Int -> Double
+truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
+ where
+ --------------
+ t :: Double
+ t = 10 ^n
+
+getInMap :: Int -> Map Int Double -> Double
+getInMap k m =
+ if (member k m)
+ then m ! k
+ else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
+-- | To transform a list of sources into a list of sources' index
+sourcesToIdx :: [Text] -> Vector Text -> [Int]
+sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
+
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
-ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
+ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
+
+idxToLabel :: [Int] -> String
+idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
+idxToLabel' :: [Double] -> String
+idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
findBounds :: [Date] -> (Date,Date)
-findBounds dates =
+findBounds [] = panic "[G.C.V.P.PhyloTools] empty dates for find bounds"
+findBounds dates =
let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates')
toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
-toPeriods dates p s =
+toPeriods dates p s =
let (start,end) = findBounds dates
- in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
+ in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
$ chunkAlong p s [start .. end]
+toFstDate :: [Text] -> Text
+toFstDate ds = snd
+ $ head' "firstDate"
+ $ sortOn fst
+ $ map (\d ->
+ let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
+ in (d',d)) ds
+
+toLstDate :: [Text] -> Text
+toLstDate ds = snd
+ $ head' "firstDate"
+ $ reverse
+ $ sortOn fst
+ $ map (\d ->
+ let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
+ in (d',d)) ds
+
+
+getTimeScale :: Phylo -> [Char]
+getTimeScale p = case (timeUnit $ getConfig p) of
+ Epoch _ _ _ -> "epoch"
+ Year _ _ _ -> "year"
+ Month _ _ _ -> "month"
+ Week _ _ _ -> "week"
+ Day _ _ _ -> "day"
+
+
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date]
-toTimeScale dates step =
+toTimeScale dates step =
let (start,end) = findBounds dates
in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
-getTimeStep time = case time of
- Year _ s _ -> s
+getTimeStep time = case time of
+ Epoch _ s _ -> s
+ Year _ s _ -> s
+ Month _ s _ -> s
+ Week _ s _ -> s
+ Day _ s _ -> s
getTimePeriod :: TimeUnit -> Int
-getTimePeriod time = case time of
- Year p _ _ -> p
+getTimePeriod time = case time of
+ Epoch p _ _ -> p
+ Year p _ _ -> p
+ Month p _ _ -> p
+ Week p _ _ -> p
+ Day p _ _ -> p
getTimeFrame :: TimeUnit -> Int
-getTimeFrame time = case time of
- Year _ _ f -> f
+getTimeFrame time = case time of
+ Epoch _ _ f -> f
+ Year _ _ f -> f
+ Month _ _ f -> f
+ Week _ _ f -> f
+ Day _ _ f -> f
-------------
-- | Fis | --
| null l' = True
| length l' > length l = False
| (union l l') == l = True
- | otherwise = False
+ | otherwise = False
-- | To filter Fis with small Support but by keeping non empty Periods
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
- --------------------------------------
+ --------------------------------------
traceSupport :: Map (Date, Date) [PhyloClique] -> String
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
- --------------------------------------
+ --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
getCliqueSupport :: Clique -> Int
-getCliqueSupport unit = case unit of
+getCliqueSupport unit = case unit of
Fis s _ -> s
- MaxClique _ -> 0
+ MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int
-getCliqueSize unit = case unit of
+getCliqueSize unit = case unit of
Fis _ s -> s
- MaxClique s -> s
+ MaxClique s _ _ -> s
--------------
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'
-getTrace :: Cooc -> Double
+getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
-- | PhyloGroup | --
--------------------
-getGroupId :: PhyloGroup -> PhyloGroupId
-getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
+getGroupId :: PhyloGroup -> PhyloGroupId
+getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id
-getGroupThr :: PhyloGroup -> Double
-getGroupThr group = last' "getGroupThr" ((group ^. phylo_groupMeta) ! "breaks")
-
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
-getPeriodPointers fil group =
- case fil of
- ToChilds -> group ^. phylo_groupPeriodChilds
- ToParents -> group ^. phylo_groupPeriodParents
+getPeriodPointers fil g =
+ case fil of
+ ToChilds -> g ^. phylo_groupPeriodChilds
+ ToParents -> g ^. phylo_groupPeriodParents
+ ToChildsMemory -> undefined
+ ToParentsMemory -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool
-filterProximity proximity thr local =
+filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ -> local >= thr
- Hamming -> undefined
+ WeightedLogSim _ -> local >= thr
+ Hamming _ -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ -> "WLJaccard"
- Hamming -> "Hamming"
+ WeightedLogSim _ -> "WeightedLogSim"
+ Hamming _ -> "Hamming"
---------------
-- | Phylo | --
---------------
addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
-addPointers fil pty pointers group =
- case pty of
- TemporalPointer -> case fil of
- ToChilds -> group & phylo_groupPeriodChilds .~ pointers
- ToParents -> group & phylo_groupPeriodParents .~ pointers
- LevelPointer -> case fil of
- ToChilds -> group & phylo_groupLevelChilds .~ pointers
- ToParents -> group & phylo_groupLevelParents .~ pointers
+addPointers fil pty pointers g =
+ case pty of
+ TemporalPointer -> case fil of
+ ToChilds -> g & phylo_groupPeriodChilds .~ pointers
+ ToParents -> g & phylo_groupPeriodParents .~ pointers
+ ToChildsMemory -> undefined
+ ToParentsMemory -> undefined
+ LevelPointer -> case fil of
+ ToChilds -> g & phylo_groupLevelChilds .~ pointers
+ ToParents -> g & phylo_groupLevelParents .~ pointers
+ ToChildsMemory -> undefined
+ ToParentsMemory -> undefined
+
+toPointer' :: Double -> Pointer -> Pointer'
+toPointer' thr pt = (fst pt,(thr,snd pt))
+
+
+addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
+addMemoryPointers fil pty thr pointers g =
+ case pty of
+ TemporalPointer -> case fil of
+ ToChilds -> undefined
+ ToParents -> undefined
+ ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
+ ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
+ LevelPointer -> undefined
getPeriodIds :: Phylo -> [(Date,Date)]
$ keys
$ phylo ^. phylo_periods
-getLevelParentId :: PhyloGroup -> PhyloGroupId
+getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
getLastLevel :: Phylo -> Level
getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getLevels :: Phylo -> [Level]
-getLevels phylo = nub
+getLevels phylo = nub
$ map snd
$ keys $ view ( phylo_periods
. traverse
getSeaElevation phylo = seaElevation (getConfig phylo)
-getConfig :: Phylo -> Config
+getConfig :: Phylo -> PhyloConfig
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
+setConfig :: PhyloConfig -> Phylo -> Phylo
+setConfig config phylo = phylo
+ & phylo_param .~ (PhyloParam
+ ((phylo ^. phylo_param) ^. phyloParam_version)
+ ((phylo ^. phylo_param) ^. phyloParam_software)
+ config)
+
+-- & phylo_param & phyloParam_config & phyloParam_config .~ config
+
+
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
+getSources :: Phylo -> Vector Text
+getSources phylo = _sources (phylo ^. phylo_sources)
+
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
-phyloToLastBranches phylo = elems
+phyloToLastBranches phylo = elems
$ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
-getGroupsFromLevel lvl phylo =
+getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodLevels
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
-getGroupsFromLevelPeriods lvl periods phylo =
+getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
- . phylo_levelGroups ) phylo
+ . phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
-getGroupsFromPeriods lvl periods =
+getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodLevels
. traverse
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
-updatePhyloGroups lvl m phylo =
+updatePhyloGroups lvl m phylo =
over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups
- . traverse
- ) (\group ->
- let id = getGroupId group
- in
- if member id m
+ . traverse
+ ) (\g ->
+ let id = getGroupId g
+ in
+ if member id m
then m ! id
- else group ) phylo
+ else g ) phylo
+
+updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
+updatePeriods periods' phylo =
+ over (phylo_periods . traverse)
+ (\prd ->
+ let prd' = periods' ! (prd ^. phylo_periodPeriod)
+ lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
+ in prd & phylo_periodPeriod' .~ prd'
+ & phylo_periodLevels .~ lvls
+ ) phylo
traceToPhylo :: Level -> Phylo -> Phylo
-traceToPhylo lvl phylo =
+traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
- <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
+ <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
--------------------
-- | Clustering | --
--------------------
+mergeBranchIds :: [[Int]] -> [Int]
+mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
+ where
+ -- | 2) find the most Up Left ids in the hierarchy of similarity
+ -- mostUpLeft :: [[Int]] -> [[Int]]
+ -- mostUpLeft ids' =
+ -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
+ -- inf = (fst . minimum) groupIds
+ -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
+ -- | 1) find the most frequent ids
+ mostFreq' :: [[Int]] -> [[Int]]
+ mostFreq' ids' =
+ let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
+ sup = (fst . maximum) groupIds
+ in map snd $ filter (\gIds -> fst gIds == sup) groupIds
+
+
+mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
+mergeMeta bId groups =
+ let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
+ in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
+
+
+groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
+groupsToBranches groups =
+ {- run the related component algorithm -}
+ let egos = map (\g -> [getGroupId g]
+ ++ (map fst $ g ^. phylo_groupPeriodParents)
+ ++ (map fst $ g ^. phylo_groupPeriodChilds)
+ ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
+ graph = relatedComponents egos
+ {- update each group's branch id -}
+ in map (\ids ->
+ let groups' = elems $ restrictKeys groups (Set.fromList ids)
+ bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
+ in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
+
relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups ->
if (null acc)
then acc ++ [groups]
- else
+ else
let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
-toRelatedComponents nodes edges =
+toRelatedComponents nodes edges =
let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
- clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
- in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
+ clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
+ in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
traceSynchronyEnd :: Phylo -> Phylo
-traceSynchronyEnd phylo =
- trace ( "\n" <> "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
+traceSynchronyEnd phylo =
+ trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo
-traceSynchronyStart phylo =
- trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
+traceSynchronyStart phylo =
+ trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
- <> "\n" ) phylo
+ <> "\n" ) phylo
-------------------
-------------------
getSensibility :: Proximity -> Double
-getSensibility proxi = case proxi of
+getSensibility proxi = case proxi of
WeightedLogJaccard s -> s
- Hamming -> undefined
+ WeightedLogSim s -> s
+ Hamming _ -> undefined
----------------
-- | Branch | --
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to increase the threshold above 1" <> "\n"
- ) branches
+ ) branches
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
-traceTemporalMatching groups =
+traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
-traceGroupsProxi m =
+traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m