]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[TOOLING] sql queries to remove unused documents and ngrams from database. Usage...
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances (Distance(..), measure)
25 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
26 import Gargantext.Core.Statistics
27 import Gargantext.Core.Viz.Graph
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, ClusterNode)
31 import Gargantext.Prelude
32 import IGraph.Random -- (Gen(..))
33 import qualified Data.HashMap.Strict as HashMap
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36 import qualified Data.Set as Set
37 import qualified Data.Vector.Storable as Vec
38 import qualified IGraph as Igraph
39 import qualified IGraph.Algorithms.Layout as Layout
40
41
42 -------------------------------------------------------------
43
44 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 defaultClustering = spinglass 1
46
47 -------------------------------------------------------------
48
49 type Threshold = Double
50
51 cooc2graph' :: Ord t => Distance
52 -> Double
53 -> Map (t, t) Int
54 -> Map (Index, Index) Double
55 cooc2graph' distance threshold myCooc
56 = Map.filter (> threshold)
57 $ mat2map
58 $ measure distance
59 $ case distance of
60 Conditional -> map2mat Triangle 0 tiSize
61 Distributional -> map2mat Square 0 tiSize
62 $ Map.filter (> 1) myCooc'
63
64 where
65 (ti, _) = createIndices myCooc
66 tiSize = Map.size ti
67 myCooc' = toIndex ti myCooc
68
69
70 data PartitionMethod = Louvain | Spinglass
71
72 -- | coocurrences graph computation
73 cooc2graphWith :: PartitionMethod
74 -> Distance
75 -> Threshold
76 -> HashMap (NgramsTerm, NgramsTerm) Int
77 -> IO Graph
78 cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
79 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
80
81 cooc2graph'' :: Ord t => Distance
82 -> Double
83 -> Map (t, t) Int
84 -> Map (Index, Index) Double
85 cooc2graph'' distance threshold myCooc = neighbourMap
86 where
87 (ti, _) = createIndices myCooc
88 myCooc' = toIndex ti myCooc
89 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
90 distanceMat = measure distance matCooc
91 neighbourMap = filterByNeighbours threshold
92 $ mat2map distanceMat
93
94
95 -- Quentin
96 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
97 filterByNeighbours threshold distanceMap = filteredMap
98 where
99 indexes :: [Index]
100 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
101 filteredMap :: Map (Index, Index) Double
102 filteredMap = Map.fromList
103 $ List.concat
104 $ map (\idx ->
105 let selected = List.reverse
106 $ List.sortOn snd
107 $ Map.toList
108 $ Map.filter (> 0)
109 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
110 in List.take (round threshold) selected
111 ) indexes
112
113
114 doDistanceMap :: Distance
115 -> Threshold
116 -> HashMap (NgramsTerm, NgramsTerm) Int
117 -> (Map (Int,Int) Double, Map (Index, Index) Int, Map NgramsTerm Index)
118 doDistanceMap distance threshold myCooc = (distanceMap, myCooc', ti)
119 where
120 -- TODO remove below
121 theMatrix = Map.fromList
122 $ HashMap.toList myCooc
123
124 (ti, _) = createIndices theMatrix
125 tiSize = Map.size ti
126 myCooc' = toIndex ti theMatrix
127 matCooc = case distance of -- Shape of the Matrix
128 Conditional -> map2mat Triangle 0 tiSize
129 Distributional -> map2mat Square 0 tiSize
130 $ case distance of -- Removing the Diagonal ?
131 Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
132 Distributional -> identity
133 $ Map.filter (>1) myCooc'
134
135 similarities = measure distance matCooc
136 links = round (let n :: Double = fromIntegral tiSize in n * log n)
137
138 distanceMap = Map.fromList $ List.take links
139 $ List.sortOn snd
140 $ Map.toList
141 $ case distance of
142 Conditional -> Map.filter (> threshold)
143 Distributional -> Map.filter (> 0)
144 $ mat2map similarities
145
146 cooc2graphWith' :: ToComId a
147 => Partitions a
148 -> Distance
149 -> Threshold
150 -> HashMap (NgramsTerm, NgramsTerm) Int
151 -> IO Graph
152 cooc2graphWith' doPartitions distance threshold myCooc = do
153 let
154 (distanceMap, myCooc', ti) = doDistanceMap distance threshold myCooc
155
156 nodesApprox :: Int
157 nodesApprox = n'
158 where
159 (as, bs) = List.unzip $ Map.keys distanceMap
160 n' = Set.size $ Set.fromList $ as <> bs
161 ClustersParams rivers _level = clustersParams nodesApprox
162
163 {- -- Debug
164 saveAsFileDebug "debug/distanceMap" distanceMap
165 printDebug "similarities" similarities
166 -}
167
168 partitions <- if (Map.size distanceMap > 0)
169 then doPartitions distanceMap
170 else panic "Text.Flow: DistanceMap is empty"
171
172 let
173 -- bridgeness' = distanceMap
174 bridgeness' = trace ("Rivers: " <> show rivers)
175 $ bridgeness rivers partitions distanceMap
176
177 confluence' = confluence (Map.keys bridgeness') 3 True False
178
179 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
180 myCooc' bridgeness' confluence' partitions
181
182
183 ------------------------------------------------------------------------
184 ------------------------------------------------------------------------
185 data ClustersParams = ClustersParams { bridgness :: Double
186 , louvain :: Text
187 } deriving (Show)
188
189 clustersParams :: Int -> ClustersParams
190 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
191 {- where
192 y | x < 100 = "0.000001"
193 | x < 350 = "0.000001"
194 | x < 500 = "0.000001"
195 | x < 1000 = "0.000001"
196 | otherwise = "1"
197 -}
198
199 ----------------------------------------------------------
200 -- | From data to Graph
201 data2graph :: ToComId a
202 => [(Text, Int)]
203 -> Map (Int, Int) Int
204 -> Map (Int, Int) Double
205 -> Map (Int, Int) Double
206 -> [a]
207 -> Graph
208 data2graph labels coocs bridge conf partitions = Graph { _graph_nodes = nodes
209 , _graph_edges = edges
210 , _graph_metadata = Nothing }
211 where
212
213 community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
214
215 nodes = map (setCoord ForceAtlas labels bridge)
216 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
217 , node_type = Terms -- or Unknown
218 , node_id = cs (show n)
219 , node_label = l
220 , node_x_coord = 0
221 , node_y_coord = 0
222 , node_attributes =
223 Attributes { clust_default = maybe 0 identity
224 (Map.lookup n community_id_by_node_id) }
225 , node_children = [] }
226 )
227 | (l, n) <- labels
228 , Set.member n $ Set.fromList
229 $ List.concat
230 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
231 $ Map.toList bridge
232 ]
233
234 edges = [ Edge { edge_source = cs (show s)
235 , edge_target = cs (show t)
236 , edge_weight = d
237 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
238 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
239 , edge_id = cs (show i)
240 }
241 | (i, ((s,t), d)) <- zip ([0..]::[Integer] )
242 (Map.toList bridge)
243 , s /= t, d > 0
244 ]
245
246
247 ------------------------------------------------------------------------
248
249 data Layout = KamadaKawai | ACP | ForceAtlas
250
251
252 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
253 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
254 where
255 (x,y) = f i
256
257
258 -- | ACP
259 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
260 setCoord l labels m (n,node) = node { node_x_coord = x
261 , node_y_coord = y
262 }
263 where
264 (x,y) = getCoord l labels m n
265
266
267 getCoord :: Ord a
268 => Layout
269 -> [(a, Int)]
270 -> Map (Int, Int) Double
271 -> Int
272 -> (Double, Double)
273 getCoord KamadaKawai _ _m _n = undefined -- layout m n
274
275 getCoord ForceAtlas _ _ n = (sin d, cos d)
276 where
277 d = fromIntegral n
278
279 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
280 $ Map.lookup n
281 $ pcaReduceTo (Dimension 2)
282 $ mapArray labels m
283 where
284 to2d :: Vec.Vector Double -> (Double, Double)
285 to2d v = (x',y')
286 where
287 ds = take 2 $ Vec.toList v
288 x' = head' "to2d" ds
289 y' = last' "to2d" ds
290
291 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
292 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
293 where
294 ns = map snd items
295
296 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
297 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
298 ------------------------------------------------------------------------
299
300 -- | KamadaKawai Layout
301 -- TODO TEST: check labels, nodeId and coordinates
302 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
303 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
304 where
305 coord :: (Map Int (Double,Double))
306 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
307 --p = Layout.defaultLGL
308 p = Layout.kamadaKawai
309 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
310 -----------------------------------------------------------------------------
311 -----------------------------------------------------------------------------
312 -----------------------------------------------------------------------------
313 -- Debug
314 {-
315 -- measure logDistributional
316 dataDebug = map2mat Square (0::Int) 19 dataBug'
317
318 dataBug' :: Map (Int, Int) Int
319 dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
320 -}