2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Viz.Phylo.Aggregates.Cooc
20 import Data.List (union,concat,nub,sort)
21 import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
23 import Data.Vector (Vector)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Tools
27 import qualified Data.Map as Map
28 import qualified Data.Set as Set
31 -- | To transform the Fis into a full coocurency Matrix in a Phylo
32 fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
33 fisToCooc m p = map (/docs)
34 $ foldl (\mem x -> adjust (+1) x mem) cooc
36 $ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
39 --------------------------------------
41 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
42 --------------------------------------
44 docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
45 --------------------------------------
46 cooc :: Map (Int, Int) (Double)
47 cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
48 --------------------------------------
52 -- | To transform a tuple of group's information into a coocurency Matrix
53 toCooc :: [([Int],Double)] -> Map (Int, Int) Double
54 toCooc l = map (/docs)
55 $ foldl (\mem x -> adjust (+1) x mem) cooc
57 $ map (\x -> listToFullCombi $ fst x) l
59 --------------------------------------
61 idx = nub $ concat $ map fst l
62 --------------------------------------
64 docs = sum $ map snd l
65 --------------------------------------
66 cooc :: Map (Int, Int) (Double)
67 cooc = Map.fromList $ map (\x -> (x,0)) $ listToFullCombi idx
68 --------------------------------------
71 -- | To reduce a coocurency Matrix to some keys
72 getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
73 getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
74 && (elem (snd k) idx)) cooc
77 -- | To get a coocurency Matrix related to a given list of Periods
78 getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
79 getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
81 --------------------------------------
82 -- | Here we need to go back to the level 1 (aka : the Fis level)
84 gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
85 --------------------------------------
91 -- | To transform a list of index into a cooc matrix
92 listToCooc :: [Int] -> Map (Int,Int) Double
93 listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
96 -- | To transform a list of ngrams into a list of indexes
97 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
98 ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
101 -- | To build the cooc matrix by years out of the corpus
102 docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
103 docsToCooc docs fdt = fromListWith sumCooc
104 $ map (\(d,l) -> (d, listToCooc l))
105 $ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
108 -- | To sum all the docs produced during a list of years
109 sumDocsByYears :: Set Date -> Map Date Double -> Double
110 sumDocsByYears years m = sum $ elems $ restrictKeys m years
113 -- | To get the cooc matrix of a group
114 groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
115 groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
118 -- | To get the union of the cooc matrix of two groups
119 unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
120 unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
125 -- phyloCooc :: Map (Int, Int) Double
126 -- phyloCooc = fisToCooc phyloFis phylo1_0_1