]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[MERGE] graphql backend route for annuaire-contact
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
48
49
50 data PartitionMethod = Spinglass | Confluence
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
57
58
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
63
64 -------------------------------------------------------------
65 type Threshold = Double
66
67
68 cooc2graph' :: Ord t => Distance
69 -> Double
70 -> Map (t, t) Int
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
74 $ mat2map
75 $ measure distance
76 $ case distance of
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
80
81 where
82 (ti, _) = createIndices myCooc
83 tiSize = Map.size ti
84 myCooc' = toIndex ti myCooc
85
86
87
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
90 -> Distance
91 -> Threshold
92 -> HashMap (NgramsTerm, NgramsTerm) Int
93 -> IO Graph
94 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
95 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
96
97
98 cooc2graphWith' :: ToComId a
99 => Partitions a
100 -> Distance
101 -> Threshold
102 -> HashMap (NgramsTerm, NgramsTerm) Int
103 -> IO Graph
104 cooc2graphWith' doPartitions distance threshold myCooc = do
105 let
106 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
107
108 --{- -- Debug
109 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
110 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
111 -- printDebug "similarities" similarities
112 --}
113
114 partitions <- if (Map.size distanceMap > 0)
115 then doPartitions distanceMap
116 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
117 , "Maybe you should add more Map Terms in your list"
118 , "Tutorial: link todo"
119 ]
120
121 let
122 nodesApprox :: Int
123 nodesApprox = n'
124 where
125 (as, bs) = List.unzip $ Map.keys distanceMap
126 n' = Set.size $ Set.fromList $ as <> bs
127 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
128 confluence' = confluence (Map.keys bridgeness') 3 True False
129
130 pure $ data2graph ti diag bridgeness' confluence' partitions
131
132
133 doDistanceMap :: Distance
134 -> Threshold
135 -> HashMap (NgramsTerm, NgramsTerm) Int
136 -> ( Map (Int,Int) Double
137 , Map (Index, Index) Int
138 , Map NgramsTerm Index
139 )
140 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
141 where
142 -- TODO remove below
143 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
144 $ Map.fromList
145 $ HashMap.toList myCooc
146
147 (ti, _it) = createIndices theMatrix
148 tiSize = Map.size ti
149
150 similarities = measure Distributional
151 $ map2mat Square 0 tiSize
152 $ toIndex ti theMatrix
153
154 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
155
156 distanceMap = Map.fromList
157 $ List.take links
158 $ List.reverse
159 $ List.sortOn snd
160 $ Map.toList
161 $ edgesFilter
162 $ Map.filter (> threshold)
163 $ mat2map similarities
164
165 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
166 where
167 myCooc' = Map.fromList $ HashMap.toList myCooc
168 (ti, _it) = createIndices myCooc'
169
170 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
171
172 distanceMap = toIndex ti
173 $ Map.fromList
174 $ List.take links
175 $ List.sortOn snd
176 $ HashMap.toList
177 $ HashMap.filter (> threshold)
178 $ conditional myCooc
179
180 ----------------------------------------------------------
181 -- | From data to Graph
182
183 type Occurrences = Int
184
185 data2graph :: ToComId a
186 => Map NgramsTerm Int
187 -> Map (Int, Int) Occurrences
188 -> Map (Int, Int) Double
189 -> Map (Int, Int) Double
190 -> [a]
191 -> Graph
192 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
193 , _graph_edges = edges
194 , _graph_metadata = Nothing
195 }
196 where
197
198 nodes = map (setCoord ForceAtlas labels bridge)
199 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
200 , node_type = Terms -- or Unknown
201 , node_id = cs (show n)
202 , node_label = unNgramsTerm l
203 , node_x_coord = 0
204 , node_y_coord = 0
205 , node_attributes = Attributes { clust_default = fromMaybe 0
206 (Map.lookup n community_id_by_node_id)
207 }
208 , node_children = []
209 }
210 )
211 | (l, n) <- labels
212 , Set.member n nodesWithScores
213 ]
214
215 edges = [ Edge { edge_source = cs (show s)
216 , edge_target = cs (show t)
217 , edge_weight = weight
218 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
219 , edge_id = cs (show i)
220 }
221 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
222 , s /= t
223 , weight > 0
224 ]
225
226 community_id_by_node_id = Map.fromList
227 $ map nodeId2comId partitions
228
229 labels = Map.toList labels'
230
231 nodesWithScores = Set.fromList
232 $ List.concat
233 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
234 $ Map.toList bridge
235
236
237 ------------------------------------------------------------------------
238
239 data Layout = KamadaKawai | ACP | ForceAtlas
240
241
242 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
243 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
244 where
245 (x,y) = f i
246
247
248 -- | ACP
249 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
250 setCoord l labels m (n,node) = node { node_x_coord = x
251 , node_y_coord = y
252 }
253 where
254 (x,y) = getCoord l labels m n
255
256
257 getCoord :: Ord a
258 => Layout
259 -> [(a, Int)]
260 -> Map (Int, Int) Double
261 -> Int
262 -> (Double, Double)
263 getCoord KamadaKawai _ _m _n = undefined -- layout m n
264
265 getCoord ForceAtlas _ _ n = (sin d, cos d)
266 where
267 d = fromIntegral n
268
269 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
270 $ Map.lookup n
271 $ pcaReduceTo (Dimension 2)
272 $ mapArray labels m
273 where
274 to2d :: Vec.Vector Double -> (Double, Double)
275 to2d v = (x',y')
276 where
277 ds = take 2 $ Vec.toList v
278 x' = head' "to2d" ds
279 y' = last' "to2d" ds
280
281 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
282 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
283 where
284 ns = map snd items
285
286 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
287 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
288 ------------------------------------------------------------------------
289
290 -- | KamadaKawai Layout
291 -- TODO TEST: check labels, nodeId and coordinates
292 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
293 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
294 where
295 coord :: (Map Int (Double,Double))
296 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
297 --p = Layout.defaultLGL
298 p = Layout.kamadaKawai
299 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
300
301 -----------------------------------------------------------------------------
302 -- MISC Tools
303 cooc2graph'' :: Ord t => Distance
304 -> Double
305 -> Map (t, t) Int
306 -> Map (Index, Index) Double
307 cooc2graph'' distance threshold myCooc = neighbourMap
308 where
309 (ti, _) = createIndices myCooc
310 myCooc' = toIndex ti myCooc
311 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
312 distanceMat = measure distance matCooc
313 neighbourMap = filterByNeighbours threshold
314 $ mat2map distanceMat
315
316 -- Quentin
317 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
318 filterByNeighbours threshold distanceMap = filteredMap
319 where
320 indexes :: [Index]
321 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
322 filteredMap :: Map (Index, Index) Double
323 filteredMap = Map.fromList
324 $ List.concat
325 $ map (\idx ->
326 let selected = List.reverse
327 $ List.sortOn snd
328 $ Map.toList
329 $ Map.filter (> 0)
330 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
331 in List.take (round threshold) selected
332 ) indexes
333
334
335
336
337