]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
add rebranching to link distante branches
[gargantext.git] / src / Gargantext / Viz / Phylo / Aggregates / Cooc.hs
1 {-|
2 Module : Gargantext.Viz.Phylo.Tools
3 Description : Phylomemy Tools to build/manage it
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE FlexibleContexts #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Viz.Phylo.Aggregates.Cooc
18 where
19
20 import Data.List (union,concat,nub,sort)
21 import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
22 import Data.Set (Set)
23 import Data.Vector (Vector)
24 import Gargantext.Prelude
25 import Gargantext.Viz.Phylo
26 import Gargantext.Viz.Phylo.Tools
27 import qualified Data.Map as Map
28 import qualified Data.Set as Set
29
30
31 -- | To transform the Fis into a full coocurency Matrix in a Phylo
32 fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
33 fisToCooc m p = map (/docs)
34 $ foldl (\mem x -> adjust (+1) x mem) cooc
35 $ concat
36 $ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
37 $ (concat . elems) m
38 where
39 --------------------------------------
40 fisNgrams :: [Ngrams]
41 fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
42 --------------------------------------
43 docs :: Double
44 docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
45 --------------------------------------
46 cooc :: Map (Int, Int) (Double)
47 cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
48 --------------------------------------
49
50
51
52 -- | To transform a tuple of group's information into a coocurency Matrix
53 toCooc :: [([Int],Double)] -> Map (Int, Int) Double
54 toCooc l = map (/docs)
55 $ foldl (\mem x -> adjust (+1) x mem) cooc
56 $ concat
57 $ map (\x -> listToFullCombi $ fst x) l
58 where
59 --------------------------------------
60 idx :: [Int]
61 idx = nub $ concat $ map fst l
62 --------------------------------------
63 docs :: Double
64 docs = sum $ map snd l
65 --------------------------------------
66 cooc :: Map (Int, Int) (Double)
67 cooc = Map.fromList $ map (\x -> (x,0)) $ listToFullCombi idx
68 --------------------------------------
69
70
71 -- | To reduce a coocurency Matrix to some keys
72 getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
73 getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
74 && (elem (snd k) idx)) cooc
75
76
77 -- | To get a coocurency Matrix related to a given list of Periods
78 getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
79 getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
80 where
81 --------------------------------------
82 -- | Here we need to go back to the level 1 (aka : the Fis level)
83 gs :: [PhyloGroup]
84 gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
85 --------------------------------------
86
87
88
89
90
91 -- | To transform a list of index into a cooc matrix
92 listToCooc :: [Int] -> Map (Int,Int) Double
93 listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
94
95
96 -- | To transform a list of ngrams into a list of indexes
97 ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
98 ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
99
100
101 -- | To build the cooc matrix by years out of the corpus
102 docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
103 docsToCooc docs fdt = fromListWith sumCooc
104 $ map (\(d,l) -> (d, listToCooc l))
105 $ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
106
107
108 -- | To sum all the docs produced during a list of years
109 sumDocsByYears :: Set Date -> Map Date Double -> Double
110 sumDocsByYears years m = sum $ elems $ restrictKeys m years
111
112
113 -- | To get the cooc matrix of a group
114 groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
115 groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
116
117
118 -- | To get the union of the cooc matrix of two groups
119 unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
120 unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
121
122
123
124
125 -- phyloCooc :: Map (Int, Int) Double
126 -- phyloCooc = fisToCooc phyloFis phylo1_0_1