]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[graphql] fixes to the ngrams context endpoint
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
26 import Gargantext.Core.Methods.Similarities.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId)
29 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
30 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
31 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
32 import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
33 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
34 import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
35 import Gargantext.Prelude
36 import Graph.Types (ClusterNode)
37 import IGraph.Random -- (Gen(..))
38 import Test.QuickCheck (elements)
39 import Test.QuickCheck.Arbitrary
40 import qualified Data.HashMap.Strict as HashMap
41 import qualified Data.List as List
42 import qualified Data.Map as Map
43 import qualified Data.Set as Set
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Text as Text
46 import qualified Data.Vector.Storable as Vec
47 import qualified Graph.BAC.ProxemyOptim as BAC
48 import qualified IGraph as Igraph
49 import qualified IGraph.Algorithms.Layout as Layout
50
51
52 data PartitionMethod = Spinglass | Confluence | Infomap
53 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
54 instance FromJSON PartitionMethod
55 instance ToJSON PartitionMethod
56 instance ToSchema PartitionMethod
57 instance Arbitrary PartitionMethod where
58 arbitrary = elements [ minBound .. maxBound ]
59
60 data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
61 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
62 instance FromJSON BridgenessMethod
63 instance ToJSON BridgenessMethod
64 instance ToSchema BridgenessMethod
65 instance Arbitrary BridgenessMethod where
66 arbitrary = elements [ minBound .. maxBound ]
67
68
69 -------------------------------------------------------------
70 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
71 -- defaultClustering x = pure $ BAC.defaultClustering x
72 defaultClustering x = spinglass 1 x
73
74 -------------------------------------------------------------
75 type Threshold = Double
76
77
78 cooc2graph' :: Ord t => Similarity
79 -> Double
80 -> Map (t, t) Int
81 -> Map (Index, Index) Double
82 cooc2graph' distance threshold myCooc
83 = Map.filter (> threshold)
84 $ mat2map
85 $ measure distance
86 $ case distance of
87 Conditional -> map2mat Triangle 0 tiSize
88 Distributional -> map2mat Square 0 tiSize
89 $ Map.filter (> 1) myCooc'
90
91 where
92 (ti, _) = createIndices myCooc
93 tiSize = Map.size ti
94 myCooc' = toIndex ti myCooc
95
96
97
98 -- coocurrences graph computation
99 cooc2graphWith :: PartitionMethod
100 -> BridgenessMethod
101 -> MultiPartite
102 -> Similarity
103 -> Threshold
104 -> Strength
105 -> HashMap (NgramsTerm, NgramsTerm) Int
106 -> IO Graph
107 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
108 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
109 cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
110 --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
111 -- TODO: change these options, or make them configurable in UI?
112
113
114 cooc2graphWith' :: Partitions
115 -> BridgenessMethod
116 -> MultiPartite
117 -> Similarity
118 -> Threshold
119 -> Strength
120 -> HashMap (NgramsTerm, NgramsTerm) Int
121 -> IO Graph
122 cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strength myCooc = do
123 let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
124 distanceMap `seq` diag `seq` ti `seq` return ()
125
126 --{- -- Debug
127 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
128 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
129 -- printDebug "similarities" similarities
130 --}
131
132 partitions <- if (Map.size distanceMap > 0)
133 then doPartitions distanceMap
134 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
135 , "Maybe you should add more Map Terms in your list"
136 , "Tutorial: link todo"
137 ]
138 length partitions `seq` return ()
139
140 let
141 !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
142 !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
143 then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
144 else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
145
146 pure $ data2graph multi ti diag bridgeness' confluence' partitions
147
148 type Reverse = Bool
149
150 doSimilarityMap :: Similarity
151 -> Threshold
152 -> Strength
153 -> HashMap (NgramsTerm, NgramsTerm) Int
154 -> ( Map (Int,Int) Double
155 , Map (Index, Index) Int
156 , Map NgramsTerm Index
157 )
158 doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
159 where
160 -- TODO remove below
161 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
162 $ Map.fromList
163 $ HashMap.toList myCooc
164
165 (ti, _it) = createIndices theMatrix
166 tiSize = Map.size ti
167
168 similarities = (\m -> m `seq` m)
169 $ (\m -> m `seq` measure Distributional m)
170 $ (\m -> m `seq` map2mat Square 0 tiSize m)
171 $ theMatrix `seq` toIndex ti theMatrix
172
173 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
174
175 distanceMap = Map.fromList
176 $ List.take links
177 $ (if strength == Weak then List.reverse else identity)
178 $ List.sortOn snd
179 $ Map.toList
180 $ edgesFilter
181 $ (\m -> m `seq` Map.filter (> threshold) m)
182 $ similarities `seq` mat2map similarities
183
184 doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
185 where
186 myCooc' = Map.fromList $ HashMap.toList myCooc
187 (ti, _it) = createIndices myCooc'
188 links = round (let n :: Double = fromIntegral (Map.size ti) in n * (log n)^(2::Int))
189 distanceMap = toIndex ti
190 $ Map.fromList
191 $ List.take links
192 $ (if strength == Weak then List.reverse else identity)
193 $ List.sortOn snd
194 $ HashMap.toList
195 $ HashMap.filter (> threshold)
196 $ conditional myCooc
197
198 ----------------------------------------------------------
199 -- | From data to Graph
200 type Occurrences = Int
201
202 nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType
203 nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
204 if HashSet.member t s1
205 then t1
206 else t2
207
208
209 data2graph :: MultiPartite
210 -> Map NgramsTerm Int
211 -> Map (Int, Int) Occurrences
212 -> Map (Int, Int) Double
213 -> Map (Int, Int) Double
214 -> [ClusterNode]
215 -> Graph
216 data2graph multi labels' occurences bridge conf partitions =
217 Graph { _graph_nodes = nodes
218 , _graph_edges = edges
219 , _graph_metadata = Nothing
220 }
221
222 where
223
224 nodes = map (setCoord ForceAtlas labels bridge)
225 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
226 , node_type = nodeTypeWith multi label
227 , node_id = (cs . show) n
228 , node_label = unNgramsTerm label
229 , node_x_coord = 0
230 , node_y_coord = 0
231 , node_attributes =
232 Attributes { clust_default = fromMaybe 0
233 (Map.lookup n community_id_by_node_id)
234 }
235 , node_children = []
236 }
237 )
238 | (label, n) <- labels
239 , Set.member n toKeep
240 ]
241
242 (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
243
244 edges = [ Edge { edge_source = cs (show s)
245 , edge_hidden = Nothing
246 , edge_target = cs (show t)
247 , edge_weight = weight
248 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
249 , edge_id = cs (show i)
250 }
251 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge'
252 , s /= t
253 , weight > 0
254 ]
255
256 community_id_by_node_id = Map.fromList
257 $ map nodeId2comId partitions
258
259 labels = Map.toList labels'
260
261
262 ------------------------------------------------------------------------
263
264 data Layout = KamadaKawai | ACP | ForceAtlas
265
266
267 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
268 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
269 where
270 (x,y) = f i
271
272
273 -- | ACP
274 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
275 setCoord l labels m (n,node) = node { node_x_coord = x
276 , node_y_coord = y
277 }
278 where
279 (x,y) = getCoord l labels m n
280
281
282 getCoord :: Ord a
283 => Layout
284 -> [(a, Int)]
285 -> Map (Int, Int) Double
286 -> Int
287 -> (Double, Double)
288 getCoord KamadaKawai _ _m _n = undefined -- layout m n
289
290 getCoord ForceAtlas _ _ n = (sin d, cos d)
291 where
292 d = fromIntegral n
293
294 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
295 $ Map.lookup n
296 $ pcaReduceTo (Dimension 2)
297 $ mapArray labels m
298 where
299 to2d :: Vec.Vector Double -> (Double, Double)
300 to2d v = (x',y')
301 where
302 ds = take 2 $ Vec.toList v
303 x' = head' "to2d" ds
304 y' = last' "to2d" ds
305
306 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
307 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
308 where
309 ns = map snd items
310
311 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
312 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
313 ------------------------------------------------------------------------
314
315 -- | KamadaKawai Layout
316 -- TODO TEST: check labels, nodeId and coordinates
317 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
318 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
319 where
320 coord :: (Map Int (Double,Double))
321 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
322 --p = Layout.defaultLGL
323 p = Layout.kamadaKawai
324 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
325
326 -----------------------------------------------------------------------------
327 -- MISC Tools
328 cooc2graph'' :: Ord t => Similarity
329 -> Double
330 -> Map (t, t) Int
331 -> Map (Index, Index) Double
332 cooc2graph'' distance threshold myCooc = neighbourMap
333 where
334 (ti, _) = createIndices myCooc
335 myCooc' = toIndex ti myCooc
336 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
337 distanceMat = measure distance matCooc
338 neighbourMap = filterByNeighbours threshold
339 $ mat2map distanceMat
340
341 -- Quentin
342 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
343 filterByNeighbours threshold distanceMap = filteredMap
344 where
345 indexes :: [Index]
346 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
347 filteredMap :: Map (Index, Index) Double
348 filteredMap = Map.fromList
349 $ List.concat
350 $ map (\idx ->
351 let selected = List.reverse
352 $ List.sortOn snd
353 $ Map.toList
354 $ Map.filter (> 0)
355 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
356 in List.take (round threshold) selected
357 ) indexes