]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FEAT] Distributional Measure Similarity in Graph Flow (WIP)
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Viz.Graph.Tools
13 where
14
15 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
16 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
17 import Data.HashMap.Strict (HashMap)
18 import Data.Map (Map)
19 import Data.Text (Text)
20 import Debug.Trace (trace)
21 import GHC.Float (sin, cos)
22 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
23 import Gargantext.Core.Methods.Distances (Distance(..), measure)
24 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
25 import Gargantext.Core.Statistics
26 import Gargantext.Core.Viz.Graph
27 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
28 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
29 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
30 import Gargantext.Prelude
31 import IGraph.Random -- (Gen(..))
32 import qualified Data.HashMap.Strict as HashMap
33 import qualified Data.List as List
34 import qualified Data.Map as Map
35 import qualified Data.Set as Set
36 import qualified Data.Vector.Storable as Vec
37 import qualified IGraph as Igraph
38 import qualified IGraph.Algorithms.Layout as Layout
39
40 type Threshold = Double
41
42
43 cooc2graph' :: Ord t => Distance
44 -> Double
45 -> Map (t, t) Int
46 -> Map (Index, Index) Double
47 cooc2graph' distance threshold myCooc
48 = Map.filter (> threshold)
49 $ mat2map
50 $ measure distance
51 $ case distance of
52 Conditional -> map2mat Triangular 0 (Map.size ti)
53 Distributional -> map2mat Square 0 (Map.size ti)
54 $ Map.filter (> 1) myCooc'
55
56 where
57 (ti, _) = createIndices myCooc
58 myCooc' = toIndex ti myCooc
59
60
61 data PartitionMethod = Louvain | Spinglass
62
63 cooc2graphWith :: PartitionMethod
64 -> Distance
65 -> Threshold
66 -> HashMap (NgramsTerm, NgramsTerm) Int
67 -> IO Graph
68 cooc2graphWith Louvain = cooc2graphWith' (cLouvain "1")
69 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
70
71
72 cooc2graphWith' :: ToComId a
73 => Partitions a
74 -> Distance
75 -> Threshold
76 -> HashMap (NgramsTerm, NgramsTerm) Int
77 -> IO Graph
78 cooc2graphWith' doPartitions distance threshold myCooc = do
79 let
80 -- TODO remove below
81 theMatrix = Map.fromList $ HashMap.toList myCooc
82
83 (ti, _) = createIndices theMatrix
84 myCooc' = toIndex ti theMatrix
85 matCooc = case distance of -- Shape of the Matrix
86 Conditional -> map2mat Triangular 0 (Map.size ti)
87 Distributional -> map2mat Square 0 (Map.size ti)
88 $ case distance of -- Removing the Diagonal ?
89 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
90 Distributional -> identity
91 $ Map.filter (>1) myCooc'
92
93 printDebug "myCooc'" myCooc'
94 printDebug "ti" (Map.size ti)
95
96 let
97 similarities = measure distance matCooc
98
99 printDebug "Similarities" similarities
100
101 let
102 distanceMap = case distance of
103 Conditional -> Map.filter (> threshold)
104 Distributional -> Map.filter (> 0)
105 $ mat2map similarities
106
107 nodesApprox :: Int
108 nodesApprox = n'
109 where
110 (as, bs) = List.unzip $ Map.keys distanceMap
111 n' = Set.size $ Set.fromList $ as <> bs
112 ClustersParams rivers _level = clustersParams nodesApprox
113
114 partitions <- if (Map.size distanceMap > 0)
115 then doPartitions distanceMap
116 else panic "Text.Flow: DistanceMap is empty"
117
118 let
119 -- bridgeness' = distanceMap
120 bridgeness' = trace ("Rivers: " <> show rivers)
121 $ bridgeness rivers partitions distanceMap
122
123 confluence' = confluence (Map.keys bridgeness') 3 True False
124
125 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
126 myCooc' bridgeness' confluence' partitions
127
128 ------------------------------------------------------------------------
129 ------------------------------------------------------------------------
130 data ClustersParams = ClustersParams { bridgness :: Double
131 , louvain :: Text
132 } deriving (Show)
133
134 clustersParams :: Int -> ClustersParams
135 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
136 {- where
137 y | x < 100 = "0.000001"
138 | x < 350 = "0.000001"
139 | x < 500 = "0.000001"
140 | x < 1000 = "0.000001"
141 | otherwise = "1"
142 -}
143
144 ----------------------------------------------------------
145 -- | From data to Graph
146 data2graph :: ToComId a
147 => [(Text, Int)]
148 -> Map (Int, Int) Int
149 -> Map (Int, Int) Double
150 -> Map (Int, Int) Double
151 -> [a]
152 -> Graph
153 data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
154 where
155
156 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
157
158 nodes = map (setCoord ForceAtlas labels bridge)
159 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
160 , node_type = Terms -- or Unknown
161 , node_id = cs (show n)
162 , node_label = l
163 , node_x_coord = 0
164 , node_y_coord = 0
165 , node_attributes =
166 Attributes { clust_default = maybe 0 identity
167 (Map.lookup n community_id_by_node_id) } }
168 )
169 | (l, n) <- labels
170 , Set.member n $ Set.fromList
171 $ List.concat
172 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
173 $ Map.toList bridge
174 ]
175
176 edges = [ Edge { edge_source = cs (show s)
177 , edge_target = cs (show t)
178 , edge_weight = d
179 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
180 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
181 , edge_id = cs (show i)
182 }
183 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
184 (Map.toList bridge)
185 , s /= t, d > 0
186 ]
187
188
189 ------------------------------------------------------------------------
190
191 data Layout = KamadaKawai | ACP | ForceAtlas
192
193
194 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
195 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
196 where
197 (x,y) = f i
198
199
200 -- | ACP
201 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
202 setCoord l labels m (n,node) = node { node_x_coord = x
203 , node_y_coord = y
204 }
205 where
206 (x,y) = getCoord l labels m n
207
208
209 getCoord :: Ord a
210 => Layout
211 -> [(a, Int)]
212 -> Map (Int, Int) Double
213 -> Int
214 -> (Double, Double)
215 getCoord KamadaKawai _ _m _n = undefined -- layout m n
216
217 getCoord ForceAtlas _ _ n = (sin d, cos d)
218 where
219 d = fromIntegral n
220
221 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
222 $ Map.lookup n
223 $ pcaReduceTo (Dimension 2)
224 $ mapArray labels m
225 where
226 to2d :: Vec.Vector Double -> (Double, Double)
227 to2d v = (x',y')
228 where
229 ds = take 2 $ Vec.toList v
230 x' = head' "to2d" ds
231 y' = last' "to2d" ds
232
233 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
234 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
235 where
236 ns = map snd items
237
238 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
239 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
240 ------------------------------------------------------------------------
241
242 -- | KamadaKawai Layout
243 -- TODO TEST: check labels, nodeId and coordinates
244 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
245 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
246 where
247 coord :: (Map Int (Double,Double))
248 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
249 --p = Layout.defaultLGL
250 p = Layout.kamadaKawai
251 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
252 -----------------------------------------------------------------------------
253 -----------------------------------------------------------------------------
254 -----------------------------------------------------------------------------
255 -- Debug
256 {-
257 -- measure logDistributional
258 dataDebug = map2mat Square (0::Int) 19 dataBug'
259
260 dataBug' :: Map (Int, Int) Int
261 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
262 -}