]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FEAT] Backend NLP French tested
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
30 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
31 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
32 import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Text as Text
44 import qualified Data.Vector.Storable as Vec
45 import qualified Graph.BAC.ProxemyOptim as BAC
46 import qualified IGraph as Igraph
47 import qualified IGraph.Algorithms.Layout as Layout
48
49
50 data PartitionMethod = Spinglass | Confluence | Infomap
51 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
52 instance FromJSON PartitionMethod
53 instance ToJSON PartitionMethod
54 instance ToSchema PartitionMethod
55 instance Arbitrary PartitionMethod where
56 arbitrary = elements [ minBound .. maxBound ]
57
58
59 -------------------------------------------------------------
60 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
61 -- defaultClustering x = pure $ BAC.defaultClustering x
62 defaultClustering x = spinglass 1 x
63
64 -------------------------------------------------------------
65 type Threshold = Double
66
67
68 cooc2graph' :: Ord t => Distance
69 -> Double
70 -> Map (t, t) Int
71 -> Map (Index, Index) Double
72 cooc2graph' distance threshold myCooc
73 = Map.filter (> threshold)
74 $ mat2map
75 $ measure distance
76 $ case distance of
77 Conditional -> map2mat Triangle 0 tiSize
78 Distributional -> map2mat Square 0 tiSize
79 $ Map.filter (> 1) myCooc'
80
81 where
82 (ti, _) = createIndices myCooc
83 tiSize = Map.size ti
84 myCooc' = toIndex ti myCooc
85
86
87
88 -- coocurrences graph computation
89 cooc2graphWith :: PartitionMethod
90 -> Distance
91 -> Threshold
92 -> Strength
93 -> HashMap (NgramsTerm, NgramsTerm) Int
94 -> IO Graph
95 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
96 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
97 cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
98 -- TODO: change these options, or make them configurable in UI?
99
100
101 cooc2graphWith' :: ToComId a
102 => Partitions a
103 -> Distance
104 -> Threshold
105 -> Strength
106 -> HashMap (NgramsTerm, NgramsTerm) Int
107 -> IO Graph
108 cooc2graphWith' doPartitions distance threshold strength myCooc = do
109 let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
110 distanceMap `seq` diag `seq` ti `seq` return ()
111
112 --{- -- Debug
113 -- saveAsFileDebug "/tmp/distanceMap" distanceMap
114 -- saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
115 -- printDebug "similarities" similarities
116 --}
117
118 partitions <- if (Map.size distanceMap > 0)
119 then doPartitions distanceMap
120 else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
121 , "Maybe you should add more Map Terms in your list"
122 , "Tutorial: link todo"
123 ]
124 length partitions `seq` return ()
125 let
126 nodesApprox :: Int
127 nodesApprox = n'
128 where
129 (as, bs) = List.unzip $ Map.keys distanceMap
130 n' = Set.size $ Set.fromList $ as <> bs
131 !bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
132 !confluence' = Map.empty -- BAC.computeConfluences 3 (Map.keys bridgeness') True
133 pure $ data2graph ti diag bridgeness' confluence' partitions
134
135 type Reverse = Bool
136
137 doDistanceMap :: Distance
138 -> Threshold
139 -> Strength
140 -> HashMap (NgramsTerm, NgramsTerm) Int
141 -> ( Map (Int,Int) Double
142 , Map (Index, Index) Int
143 , Map NgramsTerm Index
144 )
145 doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
146 where
147 -- TODO remove below
148 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
149 $ Map.fromList
150 $ HashMap.toList myCooc
151
152 (ti, _it) = createIndices theMatrix
153 tiSize = Map.size ti
154
155 similarities = (\m -> m `seq` m)
156 $ (\m -> m `seq` measure Distributional m)
157 $ (\m -> m `seq` map2mat Square 0 tiSize m)
158 $ theMatrix `seq` toIndex ti theMatrix
159
160 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
161
162 distanceMap = Map.fromList
163 $ List.take links
164 $ (if strength == Weak then List.reverse else identity)
165 $ List.sortOn snd
166 $ Map.toList
167 $ edgesFilter
168 $ (\m -> m `seq` Map.filter (> threshold) m)
169 $ similarities `seq` mat2map similarities
170
171 doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
172 where
173 myCooc' = Map.fromList $ HashMap.toList myCooc
174 (ti, _it) = createIndices myCooc'
175
176 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
177
178 distanceMap = toIndex ti
179 $ Map.fromList
180 $ List.take links
181 $ (if strength == Weak then List.reverse else identity)
182 $ List.sortOn snd
183 $ HashMap.toList
184 $ HashMap.filter (> threshold)
185 $ conditional myCooc
186
187 ----------------------------------------------------------
188 -- | From data to Graph
189
190 type Occurrences = Int
191
192 data2graph :: ToComId a
193 => Map NgramsTerm Int
194 -> Map (Int, Int) Occurrences
195 -> Map (Int, Int) Double
196 -> Map (Int, Int) Double
197 -> [a]
198 -> Graph
199 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
200 , _graph_edges = edges
201 , _graph_metadata = Nothing
202 }
203 where
204
205 nodes = map (setCoord ForceAtlas labels bridge)
206 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
207 , node_type = Terms -- or Unknown
208 , node_id = cs (show n)
209 , node_label = unNgramsTerm l
210 , node_x_coord = 0
211 , node_y_coord = 0
212 , node_attributes = Attributes { clust_default = fromMaybe 0
213 (Map.lookup n community_id_by_node_id)
214 }
215 , node_children = []
216 }
217 )
218 | (l, n) <- labels
219 , Set.member n nodesWithScores
220 ]
221
222 edges = [ Edge { edge_source = cs (show s)
223 , edge_target = cs (show t)
224 , edge_weight = weight
225 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
226 , edge_id = cs (show i)
227 }
228 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
229 , s /= t
230 , weight > 0
231 ]
232
233 community_id_by_node_id = Map.fromList
234 $ map nodeId2comId partitions
235
236 labels = Map.toList labels'
237
238 nodesWithScores = Set.fromList
239 $ List.concat
240 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
241 $ Map.toList bridge
242
243
244 ------------------------------------------------------------------------
245
246 data Layout = KamadaKawai | ACP | ForceAtlas
247
248
249 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
250 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
251 where
252 (x,y) = f i
253
254
255 -- | ACP
256 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
257 setCoord l labels m (n,node) = node { node_x_coord = x
258 , node_y_coord = y
259 }
260 where
261 (x,y) = getCoord l labels m n
262
263
264 getCoord :: Ord a
265 => Layout
266 -> [(a, Int)]
267 -> Map (Int, Int) Double
268 -> Int
269 -> (Double, Double)
270 getCoord KamadaKawai _ _m _n = undefined -- layout m n
271
272 getCoord ForceAtlas _ _ n = (sin d, cos d)
273 where
274 d = fromIntegral n
275
276 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
277 $ Map.lookup n
278 $ pcaReduceTo (Dimension 2)
279 $ mapArray labels m
280 where
281 to2d :: Vec.Vector Double -> (Double, Double)
282 to2d v = (x',y')
283 where
284 ds = take 2 $ Vec.toList v
285 x' = head' "to2d" ds
286 y' = last' "to2d" ds
287
288 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
289 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
290 where
291 ns = map snd items
292
293 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
294 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
295 ------------------------------------------------------------------------
296
297 -- | KamadaKawai Layout
298 -- TODO TEST: check labels, nodeId and coordinates
299 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
300 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
301 where
302 coord :: (Map Int (Double,Double))
303 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
304 --p = Layout.defaultLGL
305 p = Layout.kamadaKawai
306 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
307
308 -----------------------------------------------------------------------------
309 -- MISC Tools
310 cooc2graph'' :: Ord t => Distance
311 -> Double
312 -> Map (t, t) Int
313 -> Map (Index, Index) Double
314 cooc2graph'' distance threshold myCooc = neighbourMap
315 where
316 (ti, _) = createIndices myCooc
317 myCooc' = toIndex ti myCooc
318 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
319 distanceMat = measure distance matCooc
320 neighbourMap = filterByNeighbours threshold
321 $ mat2map distanceMat
322
323 -- Quentin
324 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
325 filterByNeighbours threshold distanceMap = filteredMap
326 where
327 indexes :: [Index]
328 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
329 filteredMap :: Map (Index, Index) Double
330 filteredMap = Map.fromList
331 $ List.concat
332 $ map (\idx ->
333 let selected = List.reverse
334 $ List.sortOn snd
335 $ Map.toList
336 $ Map.filter (> 0)
337 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
338 in List.take (round threshold) selected
339 ) indexes
340
341
342